Merge branch 'sun9i-a80-spl' of http://git.denx.de/u-boot-sunxi
diff --git a/arch/arm/include/asm/arch-sunxi/clock_sun9i.h b/arch/arm/include/asm/arch-sunxi/clock_sun9i.h
index a61934f..0aeb640 100644
--- a/arch/arm/include/asm/arch-sunxi/clock_sun9i.h
+++ b/arch/arm/include/asm/arch-sunxi/clock_sun9i.h
@@ -37,64 +37,65 @@
 	u8 reserved3[0x04];	/* 0x7c */
 	u32 ats_cfg;		/* 0x80 ats clock configuration */
 	u32 trace_cfg;		/* 0x84 trace clock configuration */
-	u8 reserved4[0xf8];	/* 0x88 */
+	u8 reserved4[0x14];     /* 0x88 */
+	u32 pll_stable_status;  /* 0x9c */
+	u8 reserved5[0xe0];	/* 0xa0 */
 	u32 clk_output_a;	/* 0x180 clk_output_a */
 	u32 clk_output_b;	/* 0x184 clk_output_a */
-	u8 reserved5[0x278];	/* 0x188 */
+	u8 reserved6[0x278];	/* 0x188 */
 
 	u32 nand0_clk_cfg;	/* 0x400 nand0 clock configuration0 */
 	u32 nand0_clk_cfg1;	/* 0x404 nand1 clock configuration */
-	u8 reserved6[0x08];	/* 0x408 */
+	u8 reserved7[0x08];	/* 0x408 */
 	u32 sd0_clk_cfg;	/* 0x410 sd0 clock configuration */
 	u32 sd1_clk_cfg;	/* 0x414 sd1 clock configuration */
 	u32 sd2_clk_cfg;	/* 0x418 sd2 clock configuration */
 	u32 sd3_clk_cfg;	/* 0x41c sd3 clock configuration */
-	u8 reserved7[0x08];	/* 0x420 */
+	u8 reserved8[0x08];	/* 0x420 */
 	u32 ts_clk_cfg;		/* 0x428 transport stream clock cfg */
 	u32 ss_clk_cfg;		/* 0x42c security system clock cfg */
 	u32 spi0_clk_cfg;	/* 0x430 spi0 clock configuration */
 	u32 spi1_clk_cfg;	/* 0x434 spi1 clock configuration */
 	u32 spi2_clk_cfg;	/* 0x438 spi2 clock configuration */
 	u32 spi3_clk_cfg;	/* 0x43c spi3 clock configuration */
-	u8 reserved8[0x50];	/* 0x440 */
+	u8 reserved9[0x44];	/* 0x440 */
+	u32 dram_clk_cfg;       /* 0x484 DRAM (controller) clock config */
+	u8 reserved10[0x8];     /* 0x488 */
 	u32 de_clk_cfg;		/* 0x490 display engine clock configuration */
-	u8 reserved9[0x04];	/* 0x494 */
+	u8 reserved11[0x04];	/* 0x494 */
 	u32 mp_clk_cfg;		/* 0x498 mp clock configuration */
 	u32 lcd0_clk_cfg;	/* 0x49c LCD0 module clock */
 	u32 lcd1_clk_cfg;	/* 0x4a0 LCD1 module clock */
-	u8 reserved10[0x1c];	/* 0x4a4 */
+	u8 reserved12[0x1c];	/* 0x4a4 */
 	u32 csi_isp_clk_cfg;	/* 0x4c0 CSI ISP module clock */
 	u32 csi0_clk_cfg;	/* 0x4c4 CSI0 module clock */
 	u32 csi1_clk_cfg;	/* 0x4c8 CSI1 module clock */
 	u32 fd_clk_cfg;		/* 0x4cc FD module clock */
 	u32 ve_clk_cfg;		/* 0x4d0 VE module clock */
 	u32 avs_clk_cfg;	/* 0x4d4 AVS module clock */
-	u8 reserved11[0x18];	/* 0x4d8 */
+	u8 reserved13[0x18];	/* 0x4d8 */
 	u32 gpu_core_clk_cfg;	/* 0x4f0 GPU core clock config */
 	u32 gpu_mem_clk_cfg;	/* 0x4f4 GPU memory clock config */
 	u32 gpu_axi_clk_cfg;	/* 0x4f8 GPU AXI clock config */
-	u8 reserved12[0x10];	/* 0x4fc */
+	u8 reserved14[0x10];	/* 0x4fc */
 	u32 gp_adc_clk_cfg;	/* 0x50c General Purpose ADC clk config */
-	u8 reserved13[0x70];	/* 0x510 */
+	u8 reserved15[0x70];	/* 0x510 */
 
 	u32 ahb_gate0;		/* 0x580 AHB0 Gating Register */
 	u32 ahb_gate1;		/* 0x584 AHB1 Gating Register */
 	u32 ahb_gate2;		/* 0x588 AHB2 Gating Register */
-	u8 reserved14[0x04];	/* 0x58c */
+	u8 reserved16[0x04];	/* 0x58c */
 	u32 apb0_gate;		/* 0x590 APB0 Clock Gating Register */
 	u32 apb1_gate;		/* 0x594 APB1 Clock Gating Register */
-	u8 reserved15[0x08];	/* 0x598 */
+	u8 reserved17[0x08];	/* 0x598 */
 	u32 ahb_reset0_cfg;	/* 0x5a0 AHB0 Software Reset Register */
 	u32 ahb_reset1_cfg;	/* 0x5a4 AHB1 Software Reset Register */
 	u32 ahb_reset2_cfg;	/* 0x5a8 AHB2 Software Reset Register */
-	u8 reserved16[0x04];	/* 0x5ac */
+	u8 reserved18[0x04];	/* 0x5ac */
 	u32 apb0_reset_cfg;	/* 0x5b0 Bus Software Reset Register 3 */
 	u32 apb1_reset_cfg;	/* 0x5b4 Bus Software Reset Register 4 */
 };
 
-/* pll4_periph0_cfg */
-#define PLL4_CFG_DEFAULT		0x90002800 /* 960 MHz */
-
 #define CCM_PLL4_CTRL_N_SHIFT		8
 #define CCM_PLL4_CTRL_N_MASK		(0xff << CCM_PLL4_CTRL_N_SHIFT)
 #define CCM_PLL4_CTRL_P_SHIFT		16
@@ -102,6 +103,80 @@
 #define CCM_PLL4_CTRL_M_SHIFT		18
 #define CCM_PLL4_CTRL_M_MASK		(0x1 << CCM_PLL4_CTRL_M_SHIFT)
 
+/* pllx_cfg bits */
+#define CCM_PLL1_CTRL_N(n)		(((n) & 0xff) << 8)
+#define CCM_PLL1_CTRL_P(n)		(((n) & 0x1) << 16)
+#define CCM_PLL1_CTRL_EN		(1 << 31)
+#define CCM_PLL1_CLOCK_TIME_2		(2 << 24)
+
+#define CCM_PLL2_CTRL_N(n)		(((n) & 0xff) << 8)
+#define CCM_PLL2_CTRL_P(n)		(((n) & 0x1) << 16)
+#define CCM_PLL2_CTRL_EN		(1 << 31)
+#define CCM_PLL2_CLOCK_TIME_2		(2 << 24)
+
+#define CCM_PLL4_CTRL_N(n)		(((n) & 0xff) << 8)
+#define CCM_PLL4_CTRL_EN		(1 << 31)
+
+#define CCM_PLL6_CTRL_N(n)		(((n) & 0xff) << 8)
+#define CCM_PLL6_CTRL_P(p)		(((p) & 0x1) << 16)
+#define CCM_PLL6_CTRL_EN		(1 << 31)
+#define CCM_PLL6_CFG_UPDATE             (1 << 30)
+
+#define CCM_PLL12_CTRL_N(n)		(((n) & 0xff) << 8)
+#define CCM_PLL12_CTRL_EN		(1 << 31)
+
+#define PLL_C0CPUX_STATUS               (1 << 0)
+#define PLL_C1CPUX_STATUS               (1 << 1)
+#define PLL_DDR_STATUS                  (1 << 5)
+#define PLL_PERIPH1_STATUS              (1 << 11)
+
+/* cpu_clk_source bits */
+#define C0_CPUX_CLK_SRC_SHIFT           0
+#define C1_CPUX_CLK_SRC_SHIFT           8
+#define C0_CPUX_CLK_SRC_MASK            (1 << C0_CPUX_CLK_SRC_SHIFT)
+#define C1_CPUX_CLK_SRC_MASK            (1 << C1_CPUX_CLK_SRC_SHIFT)
+#define C0_CPUX_CLK_SRC_OSC24M		(0 << C0_CPUX_CLK_SRC_SHIFT)
+#define C0_CPUX_CLK_SRC_PLL1		(1 << C0_CPUX_CLK_SRC_SHIFT)
+#define C1_CPUX_CLK_SRC_OSC24M		(0 << C1_CPUX_CLK_SRC_SHIFT)
+#define C1_CPUX_CLK_SRC_PLL2		(1 << C1_CPUX_CLK_SRC_SHIFT)
+
+/* c0_cfg */
+#define C0_CFG_AXI0_CLK_DIV_RATIO(n)    (((n - 1) & 0x3) << 0)
+#define C0_CFG_APB0_CLK_DIV_RATIO(n)    (((n - 1) & 0x3) << 8)
+
+/* ahbx_cfg */
+#define AHBx_SRC_CLK_SELECT_SHIFT       24
+#define AHBx_SRC_MASK                   (0x3 << AHBx_SRC_CLK_SELECT_SHIFT)
+#define AHB0_SRC_GTBUS_CLK              (0x0 << AHBx_SRC_CLK_SELECT_SHIFT)
+#define AHB1_SRC_GTBUS_CLK              (0x0 << AHBx_SRC_CLK_SELECT_SHIFT)
+#define AHB2_SRC_OSC24M                 (0x0 << AHBx_SRC_CLK_SELECT_SHIFT)
+#define AHBx_SRC_PLL_PERIPH0            (0x1 << AHBx_SRC_CLK_SELECT_SHIFT)
+#define AHBx_SRC_PLL_PERIPH1            (0x2 << AHBx_SRC_CLK_SELECT_SHIFT)
+#define AHBx_CLK_DIV_RATIO(n)           (((ffs(n) - 1) & 0x3) << 0)
+
+/* apb0_cfg */
+#define APB0_SRC_CLK_SELECT_SHIFT       24
+#define APB0_SRC_MASK                   (0x1 << APB0_SRC_CLK_SELECT_SHIFT)
+#define APB0_SRC_OSC24M                 (0x0 << APB0_SRC_CLK_SELECT_SHIFT)
+#define APB0_SRC_PLL_PERIPH0            (0x1 << APB0_SRC_CLK_SELECT_SHIFT)
+#define APB0_CLK_DIV_RATIO(n)           (((ffs(n) - 1) & 0x3) << 0)
+
+/* gtbus_clk_cfg */
+#define GTBUS_SRC_CLK_SELECT_SHIFT      24
+#define GTBUS_SRC_MASK                  (0x3 << GTBUS_SRC_CLK_SELECT_SHIFT)
+#define GTBUS_SRC_OSC24M                (0x0 << GTBUS_SRC_CLK_SELECT_SHIFT)
+#define GTBUS_SRC_PLL_PERIPH0           (0x1 << GTBUS_SRC_CLK_SELECT_SHIFT)
+#define GTBUS_SRC_PLL_PERIPH1           (0x2 << GTBUS_SRC_CLK_SELECT_SHIFT)
+#define GTBUS_CLK_DIV_RATIO(n)          (((n - 1) & 0x3) << 0)
+
+/* cci400_clk_cfg */
+#define CCI400_SRC_CLK_SELECT_SHIFT     24
+#define CCI400_SRC_MASK                 (0x3 << CCI400_SRC_CLK_SELECT_SHIFT)
+#define CCI400_SRC_OSC24M               (0x0 << CCI400_SRC_CLK_SELECT_SHIFT)
+#define CCI400_SRC_PLL_PERIPH0          (0x1 << CCI400_SRC_CLK_SELECT_SHIFT)
+#define CCI400_SRC_PLL_PERIPH1          (0x2 << CCI400_SRC_CLK_SELECT_SHIFT)
+#define CCI400_CLK_DIV_RATIO(n)         (((n - 1) & 0x3) << 0)
+
 /* sd#_clk_cfg fields */
 #define CCM_MMC_CTRL_M(x)		((x) - 1)
 #define CCM_MMC_CTRL_OCLK_DLY(x)	((x) << 8)
@@ -112,6 +187,8 @@
 #define CCM_MMC_CTRL_ENABLE		(1 << 31)
 
 /* ahb_gate0 fields */
+#define AHB_GATE_OFFSET_MCTL		14
+
 /* On sun9i all sdc-s share their ahb gate, so ignore (x) */
 #define AHB_GATE_OFFSET_NAND0		13
 #define AHB_GATE_OFFSET_MMC(x)		8
@@ -126,6 +203,8 @@
 #define APB1_GATE_TWI_MASK		(0xf << APB1_GATE_TWI_SHIFT)
 
 /* ahb_reset0_cfg fields */
+#define AHB_RESET_OFFSET_MCTL		14
+
 /* On sun9i all sdc-s share their ahb reset, so ignore (x) */
 #define AHB_RESET_OFFSET_MMC(x)		8
 
@@ -137,6 +216,11 @@
 
 
 #ifndef __ASSEMBLY__
+void clock_set_pll1(unsigned int clk);
+void clock_set_pll2(unsigned int clk);
+void clock_set_pll4(unsigned int clk);
+void clock_set_pll6(unsigned int clk);
+void clock_set_pll12(unsigned int clk);
 unsigned int clock_get_pll4_periph0(void);
 #endif
 
diff --git a/arch/arm/include/asm/arch-sunxi/cpu_sun9i.h b/arch/arm/include/asm/arch-sunxi/cpu_sun9i.h
index 04889c5..c775bcc 100644
--- a/arch/arm/include/asm/arch-sunxi/cpu_sun9i.h
+++ b/arch/arm/include/asm/arch-sunxi/cpu_sun9i.h
@@ -23,6 +23,10 @@
 #define SUNXI_NFC_BASE			(REGS_AHB0_BASE + 0x3000)
 #define SUNXI_TSC_BASE			(REGS_AHB0_BASE + 0x4000)
 
+#define SUNXI_GTBUS_BASE		(REGS_AHB0_BASE + 0x9000)
+/* SID address space starts at 0x01ce000, but e-fuse is at offset 0x200 */
+#define SUNXI_SID_BASE			(REGS_AHB0_BASE + 0xe200)
+
 #define SUNXI_MMC0_BASE			(REGS_AHB0_BASE + 0x0f000)
 #define SUNXI_MMC1_BASE			(REGS_AHB0_BASE + 0x10000)
 #define SUNXI_MMC2_BASE			(REGS_AHB0_BASE + 0x11000)
@@ -38,6 +42,12 @@
 #define SUNXI_ARMA9_GIC_BASE		(REGS_AHB0_BASE + 0x41000)
 #define SUNXI_ARMA9_CPUIF_BASE		(REGS_AHB0_BASE + 0x42000)
 
+#define SUNXI_DRAM_COM_BASE		(REGS_AHB0_BASE + 0x62000)
+#define SUNXI_DRAM_CTL0_BASE		(REGS_AHB0_BASE + 0x63000)
+#define SUNXI_DRAM_CTL1_BASE		(REGS_AHB0_BASE + 0x64000)
+#define SUNXI_DRAM_PHY0_BASE		(REGS_AHB0_BASE + 0x65000)
+#define SUNXI_DRAM_PHY1_BASE		(REGS_AHB0_BASE + 0x66000)
+
 /* AHB1 Module */
 #define SUNXI_DMA_BASE			(REGS_AHB1_BASE + 0x002000)
 #define SUNXI_USBOTG_BASE		(REGS_AHB1_BASE + 0x100000)
diff --git a/arch/arm/include/asm/arch-sunxi/dram.h b/arch/arm/include/asm/arch-sunxi/dram.h
index 675876f..e0be744 100644
--- a/arch/arm/include/asm/arch-sunxi/dram.h
+++ b/arch/arm/include/asm/arch-sunxi/dram.h
@@ -26,6 +26,8 @@
 #include <asm/arch/dram_sun8i_a83t.h>
 #elif defined(CONFIG_MACH_SUN8I_H3)
 #include <asm/arch/dram_sun8i_h3.h>
+#elif defined(CONFIG_MACH_SUN9I)
+#include <asm/arch/dram_sun9i.h>
 #else
 #include <asm/arch/dram_sun4i.h>
 #endif
diff --git a/arch/arm/include/asm/arch-sunxi/dram_sun9i.h b/arch/arm/include/asm/arch-sunxi/dram_sun9i.h
new file mode 100644
index 0000000..22be6bc
--- /dev/null
+++ b/arch/arm/include/asm/arch-sunxi/dram_sun9i.h
@@ -0,0 +1,278 @@
+/*
+ * Sun8i platform dram controller register and constant defines
+ *
+ * (C) Copyright 2007-2015 Allwinner Technology Co.
+ *                         Jerry Wang <wangflord@allwinnertech.com>
+ * (C) Copyright 2016  Theobroma Systems Design und Consulting GmbH
+ *                     Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _SUNXI_DRAM_SUN9I_H
+#define _SUNXI_DRAM_SUN9I_H
+
+struct sunxi_mctl_com_reg {
+	u32 cr;			/* 0x00 */
+	u32 ccr;		/* 0x04 controller configuration register */
+	u32 dbgcr;		/* 0x08 */
+	u32 dbgcr1;		/* 0x0c */
+	u32 rmcr;		/* 0x10 */
+	u8 res1[0x1c];		/* 0x14 */
+	u32 mmcr;		/* 0x30 */
+	u8 res2[0x3c];		/* 0x34 */
+	u32 mbagcr;		/* 0x70 */
+	u32 mbacr;		/* 0x74 */
+	u8 res3[0x10];		/* 0x78 */
+	u32 maer;		/* 0x88 */
+	u8 res4[0x74];		/* 0x8c */
+	u32 mdfscr;		/* 0x100 */
+	u32 mdfsmer;		/* 0x104 */
+	u32 mdfsmrmr;		/* 0x108 */
+	u32 mdfstr[4];		/* 0x10c */
+	u32 mdfsgcr;		/* 0x11c */
+	u8 res5[0x1c];		/* 0x120 */
+	u32 mdfsivr;		/* 0x13c */
+	u8 res6[0xc];		/* 0x140 */
+	u32 mdfstcr;		/* 0x14c */
+};
+
+
+struct sunxi_mctl_ctl_reg {
+	u32 mstr;		/* 0x00 master register */
+	u32 stat;		/* 0x04 operating mode status register */
+	u8 res1[0x8];		/* 0x08 */
+	u32 mrctrl[2];		/* 0x10 mode register read/write control reg */
+	u32 mstat;		/* 0x18 mode register read/write status reg */
+	u8 res2[0x4];		/* 0x1c */
+	u32 derateen;		/* 0x20 temperature derate enable register */
+	u32 derateint;		/* 0x24 temperature derate interval register */
+	u8 res3[0x8];		/* 0x28 */
+	u32 pwrctl;		/* 0x30 low power control register */
+	u32 pwrtmg;		/* 0x34 low power timing register */
+	u8 res4[0x18];		/* 0x38 */
+	u32 rfshctl0;		/* 0x50 refresh control register 0 */
+	u32 rfshctl1;		/* 0x54 refresh control register 1 */
+	u8 res5[0x8];		/* 0x58 */
+	u32 rfshctl3;		/* 0x60 refresh control register 3 */
+	u32 rfshtmg;		/* 0x64 refresh timing register */
+	u8 res6[0x68];		/* 0x68 */
+	u32 init[6];		/* 0xd0 SDRAM initialisation register */
+	u8 res7[0xc];		/* 0xe8 */
+	u32 rankctl;		/* 0xf4 rank control register */
+	u8 res8[0x8];		/* 0xf8 */
+	u32 dramtmg[9];		/* 0x100 DRAM timing register */
+	u8 res9[0x5c];		/* 0x124 */
+	u32 zqctrl[3];		/* 0x180 ZQ control register */
+	u32 zqstat;		/* 0x18c ZQ status register */
+	u32 dfitmg[2];		/* 0x190 DFI timing register */
+	u32 dfilpcfg;		/* 0x198 DFI low power configuration register */
+	u8 res10[0x4];		/* 0x19c */
+	u32 dfiupd[4];		/* 0x1a0 DFI update register */
+	u32 dfimisc;		/* 0x1b0 DFI miscellaneous control register */
+	u8 res11[0x1c];		/* 0x1b4 */
+	u32 trainctl[3];	/* 0x1d0 */
+	u32 trainstat;	        /* 0x1dc */
+	u8 res12[0x20];		/* 0x1e0 */
+	u32 addrmap[7];	        /* 0x200 address map register */
+	u8 res13[0x24];		/* 0x21c */
+	u32 odtcfg;		/* 0x240 ODT configuration register */
+	u32 odtmap;		/* 0x244 ODT/rank map register */
+	u8 res14[0x8];		/* 0x248 */
+	u32 sched;		/* 0x250 scheduler control register */
+	u8 res15[0x4];		/* 0x254 */
+	u32 perfhpr0;		/* 0x258 high priority read CAM register 0 */
+	u32 perfhpr1;		/* 0x25c high priority read CAM register 1 */
+	u32 perflpr0;		/* 0x260 low priority read CAM register 0 */
+	u32 perflpr1;		/* 0x264 low priority read CAM register 1 */
+	u32 perfwr0;		/* 0x268 write CAM register 0 */
+	u32 perfwr1;		/* 0x26c write CAM register 1 */
+};
+
+
+struct sunxi_mctl_phy_reg {
+	u8 res0[0x04];		/* 0x00 revision id ??? */
+	u32 pir;		/* 0x04 PHY initialisation register */
+	u32 pgcr[4];		/* 0x08 PHY general configuration register */
+	u32 pgsr[2];		/* 0x18 PHY general status register */
+	u32 pllcr;		/* 0x20 PLL control register */
+	u32 ptr[5];		/* 0x24 PHY timing register */
+	u32 acmdlr;		/* 0x38 AC master delay line register */
+	u32 aclcdlr;		/* 0x3c AC local calibrated delay line reg */
+	u32 acbdlr[10];		/* 0x40 AC bit delay line register */
+	u32 aciocr[6];		/* 0x68 AC IO configuration register */
+	u32 dxccr;		/* 0x80 DATX8 common configuration register */
+	u32 dsgcr;		/* 0x84 DRAM system general config register */
+	u32 dcr;		/* 0x88 DRAM configuration register */
+	u32 dtpr[4];		/* 0x8c DRAM timing parameters register */
+	u32 mr0;		/* 0x9c mode register 0 */
+	u32 mr1;		/* 0xa0 mode register 1 */
+	u32 mr2;		/* 0xa4 mode register 2 */
+	u32 mr3;		/* 0xa8 mode register 3 */
+	u32 odtcr;		/* 0xac ODT configuration register */
+	u32 dtcr;		/* 0xb0 data training configuration register */
+	u32 dtar[4];		/* 0xb4 data training address register */
+	u32 dtdr[2];		/* 0xc4 data training data register */
+	u32 dtedr[2];		/* 0xcc data training eye data register */
+	u32 rdimmgcr[2];	/* 0xd4 RDIMM general configuration register */
+	u32 rdimmcr[2];		/* 0xdc RDIMM control register */
+	u32 gpr[2];		/* 0xe4 general purpose register */
+	u32 catr[2];		/* 0xec CA training register */
+	u32 dqdsr;		/* 0xf4 DQS drift register */
+	u8 res1[0xc8];		/* 0xf8 */
+	u32 bistrr;		/* 0x1c0 BIST run register */
+	u32 bistwcr;		/* 0x1c4 BIST word count register */
+	u32 bistmskr[3];	/* 0x1c8 BIST mask register */
+	u32 bistlsr;		/* 0x1d4 BIST LFSR seed register */
+	u32 bistar[3];		/* 0x1d8 BIST address register */
+	u32 bistupdr;		/* 0x1e4 BIST user pattern data register */
+	u32 bistgsr;		/* 0x1e8 BIST general status register */
+	u32 bistwer;		/* 0x1dc BIST word error register */
+	u32 bistber[4];		/* 0x1f0 BIST bit error register */
+	u32 bistwcsr;		/* 0x200 BIST word count status register */
+	u32 bistfwr[3];		/* 0x204 BIST fail word register */
+	u8 res2[0x28];		/* 0x210 */
+	u32 iovcr[2];		/* 0x238 IO VREF control register */
+	struct ddrphy_zq {
+		u32 cr;              /* impedance control register */
+		u32 pr;              /* impedance control data register */
+		u32 dr;              /* impedance control data register */
+		u32 sr;              /* impedance control status register */
+	} zq[4];                /* 0x240, 0x250, 0x260, 0x270 */
+	struct ddrphy_dx {
+		u32 gcr[4];          /* DATX8 general configuration register */
+		u32 gsr[3];          /* DATX8 general status register */
+		u32 bdlr[7];         /* DATX8 bit delay line register */
+		u32 lcdlr[3];        /* DATX8 local calibrated delay line reg */
+		u32 mdlr;            /* DATX8 master delay line register */
+		u32 gtr;             /* DATX8 general timing register */
+		u8 res[0x34];
+	} dx[4];                /* 0x280, 0x300, 0x380, 0x400 */
+};
+
+/*
+ * DRAM common (sunxi_mctl_com_reg) register constants.
+ */
+#define MCTL_CR_RANK_MASK		(3 << 0)
+#define MCTL_CR_RANK(x)			(((x) - 1) << 0)
+#define MCTL_CR_BANK_MASK		(3 << 2)
+#define MCTL_CR_BANK(x)			((x) << 2)
+#define MCTL_CR_ROW_MASK		(0xf << 4)
+#define MCTL_CR_ROW(x)			(((x) - 1) << 4)
+#define MCTL_CR_PAGE_SIZE_MASK		(0xf << 8)
+#define MCTL_CR_PAGE_SIZE(x)		((fls(x) - 4) << 8)
+#define MCTL_CR_BUSW_MASK		(3 << 12)
+#define MCTL_CR_BUSW16			(1 << 12)
+#define MCTL_CR_BUSW32			(3 << 12)
+#define MCTL_CR_DRAMTYPE_MASK           (7 << 16)
+#define MCTL_CR_DRAMTYPE_DDR2		(2 << 16)
+#define MCTL_CR_DRAMTYPE_DDR3		(3 << 16)
+#define MCTL_CR_DRAMTYPE_LPDDR2		(6 << 16)
+
+#define MCTL_CR_CHANNEL_MASK		((1 << 22) | (1 << 20) | (1 << 19))
+#define MCTL_CR_CHANNEL_SINGLE          (1 << 22)
+#define MCTL_CR_CHANNEL_DUAL            ((1 << 22) | (1 << 20) | (1 << 19))
+
+#define MCTL_CCR_CH0_CLK_EN		(1 << 15)
+#define MCTL_CCR_CH1_CLK_EN		(1 << 31)
+
+/*
+ * post_cke_x1024 [bits 16..25]: Cycles to wait after driving CKE high
+ * to start the SDRAM initialization sequence (in 1024s of cycles).
+ */
+#define MCTL_INIT0_POST_CKE_x1024(n)    ((n & 0x0fff) << 16)
+/*
+ * pre_cke_x1024 [bits 0..11] Cycles to wait after reset before driving
+ * CKE high to start the SDRAM initialization (in 1024s of cycles)
+ */
+#define MCTL_INIT0_PRE_CKE_x1024(n)     ((n & 0x0fff) <<  0)
+#define MCTL_INIT1_DRAM_RSTN_x1024(n)   ((n & 0xff) << 16)
+#define MCTL_INIT1_FINAL_WAIT_x32(n)    ((n & 0x3f) <<  8)
+#define MCTL_INIT1_PRE_OCD_x32(n)       ((n & 0x0f) <<  0)
+#define MCTL_INIT2_IDLE_AFTER_RESET_x32(n)  ((n & 0xff) << 8)
+#define MCTL_INIT2_MIN_STABLE_CLOCK_x1(n)   ((n & 0x0f) << 0)
+#define MCTL_INIT3_MR(n)                ((n & 0xffff) << 16)
+#define MCTL_INIT3_EMR(n)               ((n & 0xffff) <<  0)
+#define MCTL_INIT4_EMR2(n)              ((n & 0xffff) << 16)
+#define MCTL_INIT4_EMR3(n)              ((n & 0xffff) <<  0)
+#define MCTL_INIT5_DEV_ZQINIT_x32(n)        ((n & 0x00ff) << 16)
+#define MCTL_INIT5_MAX_AUTO_INIT_x1024(n)   ((n & 0x03ff) <<  0);
+
+#define MCTL_DFIMISC_DFI_INIT_COMPLETE_EN  (1 << 0)
+#define MCTL_DFIUPD0_DIS_AUTO_CTRLUPD      (1 << 31)
+
+#define MCTL_MSTR_DEVICETYPE_DDR3          1
+#define MCTL_MSTR_DEVICETYPE_LPDDR2        4
+#define MCTL_MSTR_DEVICETYPE_LPDDR3        8
+#define MCTL_MSTR_DEVICETYPE(type) \
+	((type == DRAM_TYPE_DDR3) ? MCTL_MSTR_DEVICETYPE_DDR3 : \
+		((type == DRAM_TYPE_LPDDR2) ? MCTL_MSTR_DEVICETYPE_LPDDR2 : \
+					      MCTL_MSTR_DEVICETYPE_LPDDR3))
+#define MCTL_MSTR_BURSTLENGTH4             (2 << 16)
+#define MCTL_MSTR_BURSTLENGTH8             (4 << 16)
+#define MCTL_MSTR_BURSTLENGTH16            (8 << 16)
+#define MCTL_MSTR_BURSTLENGTH(type) \
+	((type == DRAM_TYPE_DDR3) ? MCTL_MSTR_BURSTLENGTH8 : \
+		((type == DRAM_TYPE_LPDDR2) ? MCTL_MSTR_BURSTLENGTH4 : \
+					      MCTL_MSTR_BURSTLENGTH8))
+#define MCTL_MSTR_ACTIVERANKS(x)           (((x == 2) ? 3 : 1) << 24)
+#define MCTL_MSTR_BUSWIDTH8                (2 << 12)
+#define MCTL_MSTR_BUSWIDTH16               (1 << 12)
+#define MCTL_MSTR_BUSWIDTH32               (0 << 12)
+#define MCTL_MSTR_2TMODE                   (1 << 10)
+
+#define MCTL_RFSHCTL3_DIS_AUTO_REFRESH     (1 << 0)
+
+#define MCTL_ZQCTRL0_TZQCS(x)              (x << 0)
+#define MCTL_ZQCTRL0_TZQCL(x)              (x << 16)
+#define MCTL_ZQCTRL0_ZQCL_DIS              (1 << 30)
+#define MCTL_ZQCTRL0_ZQCS_DIS              (1 << 31)
+#define MCTL_ZQCTRL1_TZQRESET(x)           (x << 20)
+#define MCTL_ZQCTRL1_TZQSI_x1024(x)        (x << 0)
+#define MCTL_ZQCTRL2_TZRESET_TRIGGER       (1 << 0)
+
+#define MCTL_PHY_DCR_BYTEMASK              (1 << 10)
+#define MCTL_PHY_DCR_2TMODE                (1 << 28)
+#define MCTL_PHY_DCR_DDR8BNK               (1 << 3)
+#define MCTL_PHY_DRAMMODE_DDR3             3
+#define MCTL_PHY_DRAMMODE_LPDDR2           0
+#define MCTL_PHY_DRAMMODE_LPDDR3           1
+
+#define MCTL_DTCR_DEFAULT                  0x00003007
+#define MCTL_DTCR_RANKEN(n)                (((n == 2) ? 3 : 1) << 24)
+
+#define MCTL_PGCR1_ZCKSEL_MASK             (3 << 23)
+#define MCTL_PGCR1_IODDRM_MASK             (3 << 7)
+#define MCTL_PGCR1_IODDRM_DDR3             (1 << 7)
+#define MCTL_PGCR1_IODDRM_DDR3L            (2 << 7)
+#define MCTL_PGCR1_INHVT_EN                (1 << 26)
+
+#define MCTL_PLLGCR_PLL_BYPASS             (1 << 31)
+#define MCTL_PLLGCR_PLL_POWERDOWN          (1 << 29)
+
+#define MCTL_PIR_PLL_BYPASS                (1 << 17)
+#define MCTL_PIR_MASK                      (~(1 << 17))
+#define MCTL_PIR_INIT                      (1 << 0)
+
+#define MCTL_PGSR0_ERRORS                  (0x1ff << 20)
+
+/* Constants for assembling MR0 */
+#define DDR3_MR0_PPD_FAST_EXIT             (1 << 12)
+#define DDR3_MR0_WR(n) \
+	((n <= 8) ? ((n - 4) << 9) : (((n >> 1) & 0x7) << 9))
+#define DDR3_MR0_CL(n) \
+	((((n - 4) & 0x7) << 4) | (((n - 4) & 0x8) >> 2))
+#define DDR3_MR0_BL8                       (0 << 0)
+
+#define DDR3_MR1_RTT120OHM                 ((0 << 9) | (1 << 6) | (0 << 2))
+
+#define DDR3_MR2_TWL(n) \
+	(((n - 5) & 0x7) << 3)
+
+#define MCTL_NS2CYCLES_CEIL(ns)	((ns * (CONFIG_DRAM_CLK / 2) + 999) / 1000)
+
+#define DRAM_TYPE_DDR3		3
+#define DRAM_TYPE_LPDDR2	6
+#define DRAM_TYPE_LPDDR3	7
+
+#endif
diff --git a/arch/arm/include/asm/arch-sunxi/gtbus.h b/arch/arm/include/asm/arch-sunxi/gtbus.h
new file mode 100644
index 0000000..b8308d5
--- /dev/null
+++ b/arch/arm/include/asm/arch-sunxi/gtbus.h
@@ -0,0 +1,21 @@
+/*
+ * GTBUS initialisation
+ *
+ * (C) Copyright 2016 Theobroma Systems Design und Consulting GmbH
+ *                    Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _SUNXI_GTBUS_H
+#define _SUNXI_GTBUS_H
+
+#if defined(CONFIG_MACH_SUN9I)
+#include <asm/arch/gtbus_sun9i.h>
+#endif
+
+#ifndef __ASSEMBLY__
+void gtbus_init(void);
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/arch-sunxi/gtbus_sun9i.h b/arch/arm/include/asm/arch-sunxi/gtbus_sun9i.h
new file mode 100644
index 0000000..fd50826
--- /dev/null
+++ b/arch/arm/include/asm/arch-sunxi/gtbus_sun9i.h
@@ -0,0 +1,92 @@
+/*
+ * GTBUS initialisation for sun9i
+ *
+ * (C) Copyright 2016 Theobroma Systems Design und Consulting GmbH
+ *                    Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _SUNXI_GTBUS_SUN9I_H
+#define _SUNXI_GTBUS_SUN9I_H
+
+#include <linux/types.h>
+
+struct sunxi_gtbus_reg {
+	u32 mst_cfg[36];           /* 0x000 */
+	u8  reserved1[0x70];       /* 0x090 */
+	u32 bw_wdw_cfg;            /* 0x100 */
+	u32 mst_read_prio_cfg[2];  /* 0x104 */
+	u32 lvl2_mst_cfg;          /* 0x10c */
+	u32 sw_clk_on;             /* 0x110 */
+	u32 sw_clk_off;            /* 0x114 */
+	u32 pmu_mst_en;            /* 0x118 */
+	u32 pmu_cfg;               /* 0x11c */
+	u32 pmu_cnt[19];           /* 0x120 */
+	u32 reserved2[0x94];       /* 0x16c */
+	u32 cci400_config[3];      /* 0x200 */
+	u32 cci400_status[2];      /* 0x20c */
+};
+
+/* for register GT_MST_CFG_REG(n) */
+#define GT_ENABLE_REQ           (1<<31) /* clock on */
+#define GT_DISABLE_REQ          (1<<30) /* clock off */
+#define GT_QOS_SHIFT            28
+#define GT_THD1_SHIFT           16
+#define GT_REQN_MAX             0xf /* max no master requests in one cycle */
+#define GT_REQN_SHIFT           12
+#define GT_THD0_SHIFT           0
+
+#define GT_QOS_MAX              0x3
+#define GT_THD_MAX              0xfff
+#define GT_BW_WDW_MAX           0xffff
+
+/* mst_read_prio_cfg */
+#define GT_PRIO_LOW     0
+#define GT_PRIO_HIGH    1
+
+/* GTBUS port ids */
+#define GT_PORT_CPUM1   0
+#define GT_PORT_CPUM2   1
+#define GT_PORT_SATA    2
+#define	GT_PORT_USB3    3
+#define	GT_PORT_FE0     4
+#define	GT_PORT_BE1     5
+#define	GT_PORT_BE2     6
+#define	GT_PORT_IEP0    7
+#define	GT_PORT_FE1     8
+#define	GT_PORT_BE0     9
+#define	GT_PORT_FE2     10
+#define	GT_PORT_IEP1    11
+#define	GT_PORT_VED     12
+#define	GT_PORT_VEE     13
+#define	GT_PORT_FD      14
+#define	GT_PORT_CSI     15
+#define	GT_PORT_MP      16
+#define	GT_PORT_HSI     17
+#define	GT_PORT_SS      18
+#define	GT_PORT_TS      19
+#define	GT_PORT_DMA     20
+#define	GT_PORT_NDFC0   21
+#define	GT_PORT_NDFC1   22
+#define	GT_PORT_CPUS    23
+#define	GT_PORT_TH      24
+#define	GT_PORT_GMAC    25
+#define	GT_PORT_USB0    26
+#define	GT_PORT_MSTG0   27
+#define	GT_PORT_MSTG1   28
+#define	GT_PORT_MSTG2   29
+#define	GT_PORT_MSTG3   30
+#define	GT_PORT_USB1    31
+#define	GT_PORT_GPU0    32
+#define	GT_PORT_GPU1    33
+#define	GT_PORT_USB2    34
+#define	GT_PORT_CPUM0   35
+
+#define GP_MST_CFG_DEFAULT \
+	((GT_QOS_MAX << GT_QOS_SHIFT)   | \
+	 (GT_THD_MAX << GT_THD1_SHIFT)  | \
+	 (GT_REQN_MAX << GT_REQN_SHIFT) | \
+	 (GT_THD_MAX << GT_THD0_SHIFT))
+
+#endif
diff --git a/arch/arm/mach-sunxi/Makefile b/arch/arm/mach-sunxi/Makefile
index 25367cf..e73114e 100644
--- a/arch/arm/mach-sunxi/Makefile
+++ b/arch/arm/mach-sunxi/Makefile
@@ -32,7 +32,7 @@
 else
 obj-$(CONFIG_MACH_SUN8I)	+= clock_sun6i.o
 endif
-obj-$(CONFIG_MACH_SUN9I)	+= clock_sun9i.o
+obj-$(CONFIG_MACH_SUN9I)	+= clock_sun9i.o gtbus_sun9i.o
 
 obj-$(CONFIG_AXP152_POWER)	+= pmic_bus.o
 obj-$(CONFIG_AXP209_POWER)	+= pmic_bus.o
@@ -49,4 +49,5 @@
 obj-$(CONFIG_MACH_SUN8I_A33)	+= dram_sun8i_a33.o
 obj-$(CONFIG_MACH_SUN8I_A83T)	+= dram_sun8i_a83t.o
 obj-$(CONFIG_MACH_SUN8I_H3)	+= dram_sun8i_h3.o
+obj-$(CONFIG_MACH_SUN9I)	+= dram_sun9i.o
 endif
diff --git a/arch/arm/mach-sunxi/board.c b/arch/arm/mach-sunxi/board.c
index 7713813..0f8ead9 100644
--- a/arch/arm/mach-sunxi/board.c
+++ b/arch/arm/mach-sunxi/board.c
@@ -182,7 +182,8 @@
 
 #if defined CONFIG_MACH_SUN6I || \
     defined CONFIG_MACH_SUN7I || \
-    defined CONFIG_MACH_SUN8I
+    defined CONFIG_MACH_SUN8I || \
+    defined CONFIG_MACH_SUN9I
 	/* Enable SMP mode for CPU0, by setting bit 6 of Auxiliary Ctl reg */
 	asm volatile(
 		"mrc p15, 0, r0, c1, c0, 1\n"
diff --git a/arch/arm/mach-sunxi/clock.c b/arch/arm/mach-sunxi/clock.c
index 0b8fc94..e6f53f9 100644
--- a/arch/arm/mach-sunxi/clock.c
+++ b/arch/arm/mach-sunxi/clock.c
@@ -13,16 +13,22 @@
 #include <asm/arch/clock.h>
 #include <asm/arch/gpio.h>
 #include <asm/arch/prcm.h>
+#include <asm/arch/gtbus.h>
 #include <asm/arch/sys_proto.h>
 
 __weak void clock_init_sec(void)
 {
 }
 
+__weak void gtbus_init(void)
+{
+}
+
 int clock_init(void)
 {
 #ifdef CONFIG_SPL_BUILD
 	clock_init_safe();
+	gtbus_init();
 #endif
 	clock_init_uart();
 	clock_init_sec();
diff --git a/arch/arm/mach-sunxi/clock_sun9i.c b/arch/arm/mach-sunxi/clock_sun9i.c
index 180634c..69930cf 100644
--- a/arch/arm/mach-sunxi/clock_sun9i.c
+++ b/arch/arm/mach-sunxi/clock_sun9i.c
@@ -1,8 +1,12 @@
+
 /*
  * sun9i specific clock code
  *
  * (C) Copyright 2015 Hans de Goede <hdegoede@redhat.com>
  *
+ * (C) Copyright 2016 Theobroma Systems Design und Consulting GmbH
+ *                    Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
+ *
  * SPDX-License-Identifier:	GPL-2.0+
  */
 
@@ -12,6 +16,57 @@
 #include <asm/arch/prcm.h>
 #include <asm/arch/sys_proto.h>
 
+
+#ifdef CONFIG_SPL_BUILD
+
+void clock_init_safe(void)
+{
+	struct sunxi_ccm_reg * const ccm =
+		(struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+
+	/* Set up PLL12 (peripheral 1) */
+	clock_set_pll12(1200000000);
+
+	/* Set up PLL1 (cluster 0) and PLL2 (cluster 1) */
+	clock_set_pll1(408000000);
+	clock_set_pll2(408000000);
+
+	/* Set up PLL4 (peripheral 0) */
+	clock_set_pll4(960000000);
+
+	/* Set up dividers for AXI0 and APB0 on cluster 0: PLL1 / 2 = 204MHz */
+	writel(C0_CFG_AXI0_CLK_DIV_RATIO(2) |
+	       C0_CFG_APB0_CLK_DIV_RATIO(2), &ccm->c0_cfg);
+
+	/* AHB0: 120 MHz (PLL_PERIPH0 / 8) */
+	writel(AHBx_SRC_PLL_PERIPH0 | AHBx_CLK_DIV_RATIO(8),
+	       &ccm->ahb0_cfg);
+	/* AHB1: 240 MHz (PLL_PERIPH0 / 4) */
+	writel(AHBx_SRC_PLL_PERIPH0 | AHBx_CLK_DIV_RATIO(4),
+	       &ccm->ahb1_cfg);
+	/* AHB2: 120 MHz (PLL_PERIPH0 / 8) */
+	writel(AHBx_SRC_PLL_PERIPH0 | AHBx_CLK_DIV_RATIO(8),
+	       &ccm->ahb2_cfg);
+	/* APB0: 120 MHz (PLL_PERIPH0 / 8) */
+	writel(APB0_SRC_PLL_PERIPH0 | APB0_CLK_DIV_RATIO(8),
+	       &ccm->apb0_cfg);
+
+	/* GTBUS: 400MHz (PERIPH0 div 3) */
+	writel(GTBUS_SRC_PLL_PERIPH1 | GTBUS_CLK_DIV_RATIO(3),
+	       &ccm->gtbus_cfg);
+	/* CCI400: 480MHz (PERIPH1 div 2) */
+	writel(CCI400_SRC_PLL_PERIPH0 | CCI400_CLK_DIV_RATIO(2),
+	       &ccm->cci400_cfg);
+
+	/* Deassert DMA reset and open clock gating for DMA */
+	setbits_le32(&ccm->ahb_reset1_cfg, (1 << 24));
+	setbits_le32(&ccm->apb1_gate, (1 << 24));
+
+	/* set enable-bit in TSTAMP_CTRL_REG */
+	writel(1, 0x01720000);
+}
+#endif
+
 void clock_init_uart(void)
 {
 	struct sunxi_ccm_reg *const ccm =
@@ -25,10 +80,97 @@
 	setbits_le32(&ccm->apb1_reset_cfg,
 		     1 << (APB1_RESET_UART_SHIFT +
 			   CONFIG_CONS_INDEX - 1));
+}
+
+#ifdef CONFIG_SPL_BUILD
+void clock_set_pll1(unsigned int clk)
+{
+	struct sunxi_ccm_reg * const ccm =
+		(struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+	const int p = 0;
+
+	/* Switch cluster 0 to 24MHz clock while changing PLL1 */
+	clrsetbits_le32(&ccm->cpu_clk_source, C0_CPUX_CLK_SRC_MASK,
+			C0_CPUX_CLK_SRC_OSC24M);
+
+	writel(CCM_PLL1_CTRL_EN | CCM_PLL1_CTRL_P(p) |
+	       CCM_PLL1_CLOCK_TIME_2 |
+	       CCM_PLL1_CTRL_N(clk / 24000000),
+	       &ccm->pll1_c0_cfg);
+	/*
+	 * Don't bother with the stable-time registers, as it doesn't
+	 * wait until the PLL is stable.  Note, that even Allwinner
+	 * just uses a delay loop (or rather the AVS timer) for this
+	 * instead of the PLL_STABLE_STATUS register.
+	 */
+	sdelay(2000);
+
+	/* Switch cluster 0 back to PLL1 */
+	clrsetbits_le32(&ccm->cpu_clk_source, C0_CPUX_CLK_SRC_MASK,
+			C0_CPUX_CLK_SRC_PLL1);
+}
+
+void clock_set_pll2(unsigned int clk)
+{
+	struct sunxi_ccm_reg * const ccm =
+		(struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+	const int p = 0;
+
+	/* Switch cluster 1 to 24MHz clock while changing PLL2 */
+	clrsetbits_le32(&ccm->cpu_clk_source, C1_CPUX_CLK_SRC_MASK,
+			C1_CPUX_CLK_SRC_OSC24M);
+
+	writel(CCM_PLL2_CTRL_EN | CCM_PLL2_CTRL_P(p) |
+	       CCM_PLL2_CLOCK_TIME_2 | CCM_PLL2_CTRL_N(clk / 24000000),
+	       &ccm->pll2_c1_cfg);
+
+	sdelay(2000);
+
+	/* Switch cluster 1 back to PLL2 */
+	clrsetbits_le32(&ccm->cpu_clk_source, C1_CPUX_CLK_SRC_MASK,
+			C1_CPUX_CLK_SRC_PLL2);
+}
+
+void clock_set_pll6(unsigned int clk)
+{
+	struct sunxi_ccm_reg * const ccm =
+		(struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+	const int p = 0;
+
+	writel(CCM_PLL6_CTRL_EN | CCM_PLL6_CFG_UPDATE | CCM_PLL6_CTRL_P(p)
+	       | CCM_PLL6_CTRL_N(clk / 24000000),
+	       &ccm->pll6_ddr_cfg);
+	do { } while (!(readl(&ccm->pll_stable_status) & PLL_DDR_STATUS));
+
+	sdelay(2000);
+}
+
+void clock_set_pll12(unsigned int clk)
+{
+	struct sunxi_ccm_reg * const ccm =
+		(struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+
+	if (readl(&ccm->pll12_periph1_cfg) & CCM_PLL12_CTRL_EN)
+		return;
+
+	writel(CCM_PLL12_CTRL_EN | CCM_PLL12_CTRL_N(clk / 24000000),
+	       &ccm->pll12_periph1_cfg);
+
+	sdelay(2000);
+}
+
+
+void clock_set_pll4(unsigned int clk)
+{
+	struct sunxi_ccm_reg * const ccm =
+		(struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+
+	writel(CCM_PLL4_CTRL_EN | CCM_PLL4_CTRL_N(clk / 24000000),
+	       &ccm->pll4_periph0_cfg);
 
-	/* Dup with clock_init_safe(), drop once sun9i SPL support lands */
-	writel(PLL4_CFG_DEFAULT, &ccm->pll4_periph0_cfg);
+	sdelay(2000);
 }
+#endif
 
 int clock_twi_onoff(int port, int state)
 {
diff --git a/arch/arm/mach-sunxi/dram_sun9i.c b/arch/arm/mach-sunxi/dram_sun9i.c
new file mode 100644
index 0000000..8c681f3
--- /dev/null
+++ b/arch/arm/mach-sunxi/dram_sun9i.c
@@ -0,0 +1,961 @@
+/*
+ * sun9i dram controller initialisation
+ *
+ * (C) Copyright 2007-2015
+ * Allwinner Technology Co., Ltd. <www.allwinnertech.com>
+ * Jerry Wang <wangflord@allwinnertech.com>
+ *
+ * (C) Copyright 2016 Theobroma Systems Design und Consulting GmbH
+ *                    Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <dm.h>
+#include <errno.h>
+#include <ram.h>
+#include <asm/io.h>
+#include <asm/arch/clock.h>
+#include <asm/arch/dram.h>
+#include <asm/arch/sys_proto.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+#define DRAM_CLK (CONFIG_DRAM_CLK * 1000000)
+
+/*
+ * The following amounts to an extensive rewrite of the code received from
+ * Allwinner as part of the open-source bootloader release (refer to
+ * https://github.com/allwinner-zh/bootloader.git) and augments the upstream
+ * sources (which act as the primary reference point for the inner workings
+ * of the 'underdocumented' DRAM controller in the A80) using the following
+ * documentation for other memory controllers based on the (Synopsys)
+ * Designware IP (DDR memory protocol controller and DDR PHY)
+ *   * TI Keystone II Architecture: DDR3 Memory Controller, User's Guide
+ *     Document 'SPRUHN7C', Oct 2013 (revised March 2015)
+ *   * Xilinx Zynq UltraScale+ MPSoC Register Reference
+ *     document ug1087 (v1.0)
+ * Note that the Zynq-documentation provides a very close match for the DDR
+ * memory protocol controller (and provides a very good guide to the rounding
+ * rules for various timings), whereas the TI Keystone II document should be
+ * referred to for DDR PHY specifics only.
+ *
+ * The DRAM controller in the A80 runs at half the frequency of the DDR PHY
+ * (i.e. the rules for MEMC_FREQ_RATIO=2 from the Zynq-documentation apply).
+ *
+ * Known limitations
+ * =================
+ * In the current state, the following features are not fully supported and
+ * a number of simplifying assumptions have been made:
+ *   1) Only DDR3 support is implemented, as our test platform (the A80-Q7
+ *      module) is designed to accomodate DDR3/DDR3L.
+ *   2) Only 2T-mode has been implemented and tested.
+ *   3) The controller supports two different clocking strategies (PLL6 can
+ *      either be 2*CK or CK/2)... we only support the 2*CK clock at this
+ *      time and haven't verified whether the alternative clocking strategy
+ *      works.  If you are interested in porting this over/testing this,
+ *      please refer to cases where bit 0 of 'dram_tpr8' is tested in the
+ *      original code from Allwinner.
+ *   4) Support for 2 ranks per controller is not implemented (as we don't
+ *      the hardware to test it).
+ *
+ * Future directions
+ * =================
+ * The driver should be driven from a device-tree based configuration that
+ * can dynamically provide the necessary timing parameters (i.e. target
+ * frequency and speed-bin information)---the data structures used in the
+ * calculation of the timing parameters are already designed to capture
+ * similar information as the device tree would provide.
+ *
+ * To enable a device-tree based configuration of the sun9i platform, we
+ * will need to enable CONFIG_TPL and bootstrap in 3 stages: initially
+ * into SRAM A1 (40KB) and next into SRAM A2 (160KB)---which would be the
+ * stage to initialise the platform via the device-tree---before having
+ * the full U-Boot run from DDR.
+ */
+
+/*
+ * A number of DDR3 timings are given as "the greater of a fixed number of
+ * clock cycles (CK) or nanoseconds.  We express these using a structure
+ * that holds a cycle count and a duration in picoseconds (so we can model
+ * sub-ns timings, such as 7.5ns without losing precision or resorting to
+ * rounding up early.
+ */
+struct dram_sun9i_timing {
+	u32 ck;
+	u32 ps;
+};
+
+/* */
+struct dram_sun9i_cl_cwl_timing {
+	u32 CL;
+	u32 CWL;
+	u32 tCKmin;  /* in ps */
+	u32 tCKmax;  /* in ps */
+};
+
+struct dram_sun9i_para {
+	u32 dram_type;
+
+	u8 bus_width;
+	u8 chan;
+	u8 rank;
+	u8 rows;
+	u16 page_size;
+
+	/* Timing information for each speed-bin */
+	struct dram_sun9i_cl_cwl_timing *cl_cwl_table;
+	u32 cl_cwl_numentries;
+
+	/*
+	 * For the timings, we try to keep the order and grouping used in
+	 * JEDEC Standard No. 79-3F
+	 */
+
+	/* timings */
+	u32 tREFI; /* in ns */
+	u32 tRFC;  /* in ns */
+
+	u32 tRAS;  /* in ps */
+
+	/* command and address timing */
+	u32 tDLLK; /* in nCK */
+	struct dram_sun9i_timing tRTP;
+	struct dram_sun9i_timing tWTR;
+	u32 tWR;   /* in nCK */
+	u32 tMRD;  /* in nCK */
+	struct dram_sun9i_timing tMOD;
+	u32 tRCD;  /* in ps */
+	u32 tRP;   /* in ps */
+	u32 tRC;   /* in ps */
+	u32 tCCD;  /* in nCK */
+	struct dram_sun9i_timing tRRD;
+	u32 tFAW;  /* in ps */
+
+	/* calibration timing */
+	/* struct dram_sun9i_timing tZQinit; */
+	struct dram_sun9i_timing tZQoper;
+	struct dram_sun9i_timing tZQCS;
+
+	/* reset timing */
+	/* struct dram_sun9i_timing tXPR; */
+
+	/* self-refresh timings */
+	struct dram_sun9i_timing tXS;
+	u32 tXSDLL; /* in nCK */
+	/* struct dram_sun9i_timing tCKESR; */
+	struct dram_sun9i_timing tCKSRE;
+	struct dram_sun9i_timing tCKSRX;
+
+	/* power-down timings */
+	struct dram_sun9i_timing tXP;
+	struct dram_sun9i_timing tXPDLL;
+	struct dram_sun9i_timing tCKE;
+
+	/* write leveling timings */
+	u32 tWLMRD;    /* min, in nCK */
+	/* u32 tWLDQSEN;  min, in nCK */
+	u32 tWLO;      /* max, in ns */
+	/* u32 tWLOE;     max, in ns */
+
+	/* u32 tCKDPX;    in nCK */
+	/* u32 tCKCSX;    in nCK */
+};
+
+static void mctl_sys_init(void);
+
+#define SCHED_RDWR_IDLE_GAP(n)            ((n & 0xff) << 24)
+#define SCHED_GO2CRITICAL_HYSTERESIS(n)   ((n & 0xff) << 16)
+#define SCHED_LPR_NUM_ENTRIES(n)          ((n & 0xff) <<  8)
+#define SCHED_PAGECLOSE                   (1 << 2)
+#define SCHED_PREFER_WRITE                (1 << 1)
+#define SCHED_FORCE_LOW_PRI_N             (1 << 0)
+
+#define SCHED_CONFIG		(SCHED_RDWR_IDLE_GAP(0xf) | \
+				 SCHED_GO2CRITICAL_HYSTERESIS(0x80) | \
+				 SCHED_LPR_NUM_ENTRIES(0x20) | \
+				 SCHED_FORCE_LOW_PRI_N)
+#define PERFHPR0_CONFIG                   0x0000001f
+#define PERFHPR1_CONFIG                   0x1f00001f
+#define PERFLPR0_CONFIG                   0x000000ff
+#define PERFLPR1_CONFIG                   0x0f0000ff
+#define PERFWR0_CONFIG                    0x000000ff
+#define PERFWR1_CONFIG                    0x0f0001ff
+
+static void mctl_ctl_sched_init(unsigned long  base)
+{
+	struct sunxi_mctl_ctl_reg *mctl_ctl =
+		(struct sunxi_mctl_ctl_reg *)base;
+
+	/* Needs to be done before the global clk enable... */
+	writel(SCHED_CONFIG, &mctl_ctl->sched);
+	writel(PERFHPR0_CONFIG, &mctl_ctl->perfhpr0);
+	writel(PERFHPR1_CONFIG, &mctl_ctl->perfhpr1);
+	writel(PERFLPR0_CONFIG, &mctl_ctl->perflpr0);
+	writel(PERFLPR1_CONFIG, &mctl_ctl->perflpr1);
+	writel(PERFWR0_CONFIG, &mctl_ctl->perfwr0);
+	writel(PERFWR1_CONFIG, &mctl_ctl->perfwr1);
+}
+
+static void mctl_sys_init(void)
+{
+	struct sunxi_ccm_reg * const ccm =
+		(struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+	struct sunxi_mctl_com_reg * const mctl_com =
+		(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+
+	debug("Setting PLL6 to %d\n", DRAM_CLK * 2);
+	clock_set_pll6(DRAM_CLK * 2);
+
+	/* Original dram init code which may come in handy later
+	********************************************************
+	clock_set_pll6(use_2channelPLL ? (DRAM_CLK * 2) :
+					 (DRAM_CLK / 2), false);
+
+	if ((para->dram_clk <= 400)|((para->dram_tpr8 & 0x1)==0)) {
+		 * PLL6 should be 2*CK *
+		 * ccm_setup_pll6_ddr_clk(PLL6_DDR_CLK); *
+		ccm_setup_pll6_ddr_clk((1000000 * (para->dram_clk) * 2), 0);
+	} else {
+		 * PLL6 should be CK/2 *
+		ccm_setup_pll6_ddr_clk((1000000 * (para->dram_clk) / 2), 1);
+	}
+
+	if (para->dram_tpr13 & (0xf<<18)) {
+		 *
+		 * bit21:bit18=0001:pll swing 0.4
+		 * bit21:bit18=0010:pll swing 0.3
+		 * bit21:bit18=0100:pll swing 0.2
+		 * bit21:bit18=1000:pll swing 0.1
+		 *
+		dram_dbg("DRAM fre extend open !\n");
+		reg_val=mctl_read_w(CCM_PLL6_DDR_REG);
+		reg_val&=(0x1<<16);
+		reg_val=reg_val>>16;
+
+		if(para->dram_tpr13 & (0x1<<18))
+		{
+			mctl_write_w(CCM_PLL_BASE + 0x114,
+				(0x3333U|(0x3<<17)|(reg_val<<19)|(0x120U<<20)|
+				(0x2U<<29)|(0x1U<<31)));
+		}
+		else if(para->dram_tpr13 & (0x1<<19))
+		{
+			mctl_write_w(CCM_PLL_BASE + 0x114,
+				(0x6666U|(0x3U<<17)|(reg_val<<19)|(0xD8U<<20)|
+				(0x2U<<29)|(0x1U<<31)));
+		}
+		else if(para->dram_tpr13 & (0x1<<20))
+		{
+			mctl_write_w(CCM_PLL_BASE + 0x114,
+				(0x9999U|(0x3U<<17)|(reg_val<<19)|(0x90U<<20)|
+				(0x2U<<29)|(0x1U<<31)));
+		}
+		else if(para->dram_tpr13 & (0x1<<21))
+		{
+			mctl_write_w(CCM_PLL_BASE + 0x114,
+				(0xccccU|(0x3U<<17)|(reg_val<<19)|(0x48U<<20)|
+				(0x2U<<29)|(0x1U<<31)));
+		}
+
+		//frequency extend open
+		reg_val = mctl_read_w(CCM_PLL6_DDR_REG);
+		reg_val |= ((0x1<<24)|(0x1<<30));
+		mctl_write_w(CCM_PLL6_DDR_REG, reg_val);
+
+
+		while(mctl_read_w(CCM_PLL6_DDR_REG) & (0x1<<30));
+	}
+
+	aw_delay(0x20000);	//make some delay
+	********************************************************
+	*/
+
+	/* assert mctl reset */
+	clrbits_le32(&ccm->ahb_reset0_cfg, 1 << AHB_RESET_OFFSET_MCTL);
+	/* stop mctl clock */
+	clrbits_le32(&ccm->ahb_gate0, 1 << AHB_GATE_OFFSET_MCTL);
+
+	sdelay(2000);
+
+	/* deassert mctl reset */
+	setbits_le32(&ccm->ahb_reset0_cfg, 1 << AHB_RESET_OFFSET_MCTL);
+	/* enable mctl clock */
+	setbits_le32(&ccm->ahb_gate0, 1 << AHB_GATE_OFFSET_MCTL);
+
+	/* set up the transactions scheduling before enabling the global clk */
+	mctl_ctl_sched_init(SUNXI_DRAM_CTL0_BASE);
+	mctl_ctl_sched_init(SUNXI_DRAM_CTL1_BASE);
+	sdelay(1000);
+
+	debug("2\n");
+
+	/* (3 << 12): PLL_DDR */
+	writel((3 << 12) | (1 << 16), &ccm->dram_clk_cfg);
+	do {
+		debug("Waiting for DRAM_CLK_CFG\n");
+		sdelay(10000);
+	} while (readl(&ccm->dram_clk_cfg) & (1 << 16));
+	setbits_le32(&ccm->dram_clk_cfg, (1 << 31));
+
+	/* TODO: we only support the common case ... i.e. 2*CK */
+	setbits_le32(&mctl_com->ccr, (1 << 14) | (1 << 30));
+	writel(2, &mctl_com->rmcr); /* controller clock is PLL6/4 */
+
+	sdelay(2000);
+
+	/* Original dram init code which may come in handy later
+	********************************************************
+	if ((para->dram_clk <= 400) | ((para->dram_tpr8 & 0x1) == 0)) {
+		 * PLL6 should be 2*CK *
+		 * gating 2 channel pll *
+		reg_val = mctl_read_w(MC_CCR);
+		reg_val |= ((0x1 << 14) | (0x1U << 30));
+		mctl_write_w(MC_CCR, reg_val);
+		mctl_write_w(MC_RMCR, 0x2); * controller clock use pll6/4 *
+	} else {
+		 * enable 2 channel pll *
+		reg_val = mctl_read_w(MC_CCR);
+		reg_val &= ~((0x1 << 14) | (0x1U << 30));
+		mctl_write_w(MC_CCR, reg_val);
+		mctl_write_w(MC_RMCR, 0x0); * controller clock use pll6 *
+	}
+
+	reg_val = mctl_read_w(MC_CCR);
+	reg_val &= ~((0x1<<15)|(0x1U<<31));
+	mctl_write_w(MC_CCR, reg_val);
+	aw_delay(20);
+	//aw_delay(0x10);
+	********************************************************
+	*/
+
+	clrbits_le32(&mctl_com->ccr, MCTL_CCR_CH0_CLK_EN | MCTL_CCR_CH1_CLK_EN);
+	sdelay(1000);
+
+	setbits_le32(&mctl_com->ccr, MCTL_CCR_CH0_CLK_EN);
+	/* TODO if (para->chan == 2) */
+	setbits_le32(&mctl_com->ccr, MCTL_CCR_CH1_CLK_EN);
+}
+
+static void mctl_com_init(struct dram_sun9i_para *para)
+{
+	struct sunxi_mctl_com_reg * const mctl_com =
+		(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+
+	/* TODO: hard-wired for DDR3 now */
+	writel(((para->chan == 2) ? MCTL_CR_CHANNEL_DUAL :
+				    MCTL_CR_CHANNEL_SINGLE)
+	       | MCTL_CR_DRAMTYPE_DDR3 | MCTL_CR_BANK(1)
+	       | MCTL_CR_ROW(para->rows)
+	       | ((para->bus_width == 32) ? MCTL_CR_BUSW32 : MCTL_CR_BUSW16)
+	       | MCTL_CR_PAGE_SIZE(para->page_size) | MCTL_CR_RANK(para->rank),
+	       &mctl_com->cr);
+
+	debug("CR: %d\n", readl(&mctl_com->cr));
+}
+
+static u32 mctl_channel_init(u32 ch_index, struct dram_sun9i_para *para)
+{
+	struct sunxi_mctl_ctl_reg *mctl_ctl;
+	struct sunxi_mctl_phy_reg *mctl_phy;
+
+	u32 CL = 0;
+	u32 CWL = 0;
+	u16 mr[4] = { 0, };
+
+#define PS2CYCLES_FLOOR(n)    ((n * CONFIG_DRAM_CLK) / 1000000)
+#define PS2CYCLES_ROUNDUP(n)  ((n * CONFIG_DRAM_CLK + 999999) / 1000000)
+#define NS2CYCLES_FLOOR(n)    ((n * CONFIG_DRAM_CLK) / 1000)
+#define NS2CYCLES_ROUNDUP(n)  ((n * CONFIG_DRAM_CLK + 999) / 1000)
+#define MAX(a, b)             ((a) > (b) ? (a) : (b))
+
+	/*
+	 * Convert the values to cycle counts (nCK) from what is provided
+	 * by the definition of each speed bin.
+	 */
+	/* const u32 tREFI = NS2CYCLES_FLOOR(para->tREFI); */
+	const u32 tREFI = NS2CYCLES_FLOOR(para->tREFI);
+	const u32 tRFC  = NS2CYCLES_ROUNDUP(para->tRFC);
+	const u32 tRCD  = PS2CYCLES_ROUNDUP(para->tRCD);
+	const u32 tRP   = PS2CYCLES_ROUNDUP(para->tRP);
+	const u32 tRC   = PS2CYCLES_ROUNDUP(para->tRC);
+	const u32 tRAS  = PS2CYCLES_ROUNDUP(para->tRAS);
+
+	/* command and address timing */
+	const u32 tDLLK = para->tDLLK;
+	const u32 tRTP  = MAX(para->tRTP.ck, PS2CYCLES_ROUNDUP(para->tRTP.ps));
+	const u32 tWTR  = MAX(para->tWTR.ck, PS2CYCLES_ROUNDUP(para->tWTR.ps));
+	const u32 tWR   = NS2CYCLES_FLOOR(para->tWR);
+	const u32 tMRD  = para->tMRD;
+	const u32 tMOD  = MAX(para->tMOD.ck, PS2CYCLES_ROUNDUP(para->tMOD.ps));
+	const u32 tCCD  = para->tCCD;
+	const u32 tRRD  = MAX(para->tRRD.ck, PS2CYCLES_ROUNDUP(para->tRRD.ps));
+	const u32 tFAW  = PS2CYCLES_ROUNDUP(para->tFAW);
+
+	/* calibration timings */
+	/* const u32 tZQinit = MAX(para->tZQinit.ck,
+				PS2CYCLES_ROUNDUP(para->tZQinit.ps)); */
+	const u32 tZQoper = MAX(para->tZQoper.ck,
+				PS2CYCLES_ROUNDUP(para->tZQoper.ps));
+	const u32 tZQCS   = MAX(para->tZQCS.ck,
+				PS2CYCLES_ROUNDUP(para->tZQCS.ps));
+
+	/* reset timing */
+	/* const u32 tXPR  = MAX(para->tXPR.ck,
+				PS2CYCLES_ROUNDUP(para->tXPR.ps)); */
+
+	/* power-down timings */
+	const u32 tXP    = MAX(para->tXP.ck, PS2CYCLES_ROUNDUP(para->tXP.ps));
+	const u32 tXPDLL = MAX(para->tXPDLL.ck,
+			       PS2CYCLES_ROUNDUP(para->tXPDLL.ps));
+	const u32 tCKE   = MAX(para->tCKE.ck, PS2CYCLES_ROUNDUP(para->tCKE.ps));
+
+	/*
+	 * self-refresh timings (keep below power-down timings, as tCKESR
+	 * needs to be calculated based on the nCK value of tCKE)
+	 */
+	const u32 tXS    = MAX(para->tXS.ck, PS2CYCLES_ROUNDUP(para->tXS.ps));
+	const u32 tXSDLL = para->tXSDLL;
+	const u32 tCKSRE = MAX(para->tCKSRE.ck,
+			       PS2CYCLES_ROUNDUP(para->tCKSRE.ps));
+	const u32 tCKESR = tCKE + 1;
+	const u32 tCKSRX = MAX(para->tCKSRX.ck,
+			       PS2CYCLES_ROUNDUP(para->tCKSRX.ps));
+
+	/* write leveling timings */
+	const u32 tWLMRD = para->tWLMRD;
+	/* const u32 tWLDQSEN = para->tWLDQSEN; */
+	const u32 tWLO = PS2CYCLES_FLOOR(para->tWLO);
+	/* const u32 tWLOE = PS2CYCLES_FLOOR(para->tWLOE); */
+
+	const u32 tRASmax = tREFI * 9;
+	int i;
+
+	for (i = 0; i < para->cl_cwl_numentries; ++i) {
+		const u32 tCK = 1000000 / CONFIG_DRAM_CLK;
+
+		if ((para->cl_cwl_table[i].tCKmin <= tCK) &&
+		    (tCK < para->cl_cwl_table[i].tCKmax)) {
+			CL = para->cl_cwl_table[i].CL;
+			CWL = para->cl_cwl_table[i].CWL;
+
+			debug("found CL/CWL: CL = %d, CWL = %d\n", CL, CWL);
+			break;
+		}
+	}
+
+	if ((CL == 0) && (CWL == 0)) {
+		printf("failed to find valid CL/CWL for operating point %d MHz\n",
+		       CONFIG_DRAM_CLK);
+		return 0;
+	}
+
+	if (ch_index == 0) {
+		mctl_ctl = (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
+		mctl_phy = (struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY0_BASE;
+	} else {
+		mctl_ctl = (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL1_BASE;
+		mctl_phy = (struct sunxi_mctl_phy_reg *)SUNXI_DRAM_PHY1_BASE;
+	}
+
+	if (para->dram_type == DRAM_TYPE_DDR3) {
+		mr[0] = DDR3_MR0_PPD_FAST_EXIT | DDR3_MR0_WR(tWR) |
+			DDR3_MR0_CL(CL);
+		mr[1] = DDR3_MR1_RTT120OHM;
+		mr[2] = DDR3_MR2_TWL(CWL);
+		mr[3] = 0;
+
+		/*
+		 * DRAM3 initialisation requires holding CKE LOW for
+		 * at least 500us prior to starting the initialisation
+		 * sequence and at least 10ns after driving CKE HIGH
+		 * before the initialisation sequence may be started).
+		 *
+		 * Refer to Micron document "TN-41-07: DDR3 Power-Up,
+		 * Initialization, and Reset DDR3 Initialization
+		 * Routine" for details).
+		 */
+		writel(MCTL_INIT0_POST_CKE_x1024(1) |
+		       MCTL_INIT0_PRE_CKE_x1024(
+			    (500 * CONFIG_DRAM_CLK + 1023) / 1024), /* 500us */
+		       &mctl_ctl->init[0]);
+		writel(MCTL_INIT1_DRAM_RSTN_x1024(1),
+		       &mctl_ctl->init[1]);
+		/* INIT2 is not used for DDR3 */
+		writel(MCTL_INIT3_MR(mr[0]) | MCTL_INIT3_EMR(mr[1]),
+		       &mctl_ctl->init[3]);
+		writel(MCTL_INIT4_EMR2(mr[2]) | MCTL_INIT4_EMR3(mr[3]),
+		       &mctl_ctl->init[4]);
+		writel(MCTL_INIT5_DEV_ZQINIT_x32(512 / 32), /* 512 cycles */
+		       &mctl_ctl->init[5]);
+	} else {
+		/* !!! UNTESTED !!! */
+		/*
+		 * LPDDR2 and/or LPDDR3 require a 200us minimum delay
+		 * after driving CKE HIGH in the initialisation sequence.
+		 */
+		writel(MCTL_INIT0_POST_CKE_x1024(
+				(200 * CONFIG_DRAM_CLK + 1023) / 1024),
+		       &mctl_ctl->init[0]);
+		writel(MCTL_INIT1_DRAM_RSTN_x1024(1),
+		       &mctl_ctl->init[1]);
+		writel(MCTL_INIT2_IDLE_AFTER_RESET_x32(
+				(CONFIG_DRAM_CLK + 31) / 32) /* 1us */
+		       | MCTL_INIT2_MIN_STABLE_CLOCK_x1(5),  /* 5 cycles */
+		       &mctl_ctl->init[2]);
+		writel(MCTL_INIT3_MR(mr[1]) | MCTL_INIT3_EMR(mr[2]),
+		       &mctl_ctl->init[3]);
+		writel(MCTL_INIT4_EMR2(mr[3]),
+		       &mctl_ctl->init[4]);
+		writel(MCTL_INIT5_DEV_ZQINIT_x32(
+				(CONFIG_DRAM_CLK + 31) / 32) /* 1us */
+		       | MCTL_INIT5_MAX_AUTO_INIT_x1024(
+				(10 * CONFIG_DRAM_CLK + 1023) / 1024),
+		       &mctl_ctl->init[5]);
+	}
+
+	/* (DDR3) We always use a burst-length of 8. */
+#define MCTL_BL               8
+	/* wr2pre: WL + BL/2 + tWR */
+#define WR2PRE           (MCTL_BL/2 + CWL + tWTR)
+	/* wr2rd = CWL + BL/2 + tWTR */
+#define WR2RD            (MCTL_BL/2 + CWL + tWTR)
+	/*
+	 * rd2wr = RL + BL/2 + 2 - WL (for DDR3)
+	 * rd2wr = RL + BL/2 + RU(tDQSCKmax/tCK) + 1 - WL (for LPDDR2/LPDDR3)
+	 */
+#define RD2WR            (CL + MCTL_BL/2 + 2 - CWL)
+#define MCTL_PHY_TRTW        0
+#define MCTL_PHY_TRTODT      0
+
+#define MCTL_DIV2(n)         ((n + 1)/2)
+#define MCTL_DIV32(n)        (n/32)
+#define MCTL_DIV1024(n)      (n/1024)
+
+	writel((MCTL_DIV2(WR2PRE) << 24) | (MCTL_DIV2(tFAW) << 16) |
+	       (MCTL_DIV1024(tRASmax) << 8) | (MCTL_DIV2(tRAS) << 0),
+	       &mctl_ctl->dramtmg[0]);
+	writel((MCTL_DIV2(tXP) << 16) | (MCTL_DIV2(tRTP) << 8) |
+	       (MCTL_DIV2(tRC) << 0),
+	       &mctl_ctl->dramtmg[1]);
+	writel((MCTL_DIV2(CWL) << 24) | (MCTL_DIV2(CL) << 16) |
+	       (MCTL_DIV2(RD2WR) << 8) | (MCTL_DIV2(WR2RD) << 0),
+	       &mctl_ctl->dramtmg[2]);
+	/*
+	 * Note: tMRW is located at bit 16 (and up) in DRAMTMG3...
+	 * this is only relevant for LPDDR2/LPDDR3
+	 */
+	writel((MCTL_DIV2(tMRD) << 12) | (MCTL_DIV2(tMOD) << 0),
+	       &mctl_ctl->dramtmg[3]);
+	writel((MCTL_DIV2(tRCD) << 24) | (MCTL_DIV2(tCCD) << 16) |
+	       (MCTL_DIV2(tRRD) << 8) | (MCTL_DIV2(tRP) << 0),
+	       &mctl_ctl->dramtmg[4]);
+	writel((MCTL_DIV2(tCKSRX) << 24) | (MCTL_DIV2(tCKSRE) << 16) |
+	       (MCTL_DIV2(tCKESR) << 8) | (MCTL_DIV2(tCKE) << 0),
+	       &mctl_ctl->dramtmg[5]);
+
+	/* These timings are relevant for LPDDR2/LPDDR3 only */
+	/* writel((MCTL_TCKDPDE << 24) | (MCTL_TCKDPX << 16) |
+	       (MCTL_TCKCSX << 0), &mctl_ctl->dramtmg[6]); */
+
+	/* printf("DRAMTMG7 reset value: 0x%x\n",
+		readl(&mctl_ctl->dramtmg[7])); */
+	/* DRAMTMG7 reset value: 0x202 */
+	/* DRAMTMG7 should contain t_ckpde and t_ckpdx: check reset values!!! */
+	/* printf("DRAMTMG8 reset value: 0x%x\n",
+		readl(&mctl_ctl->dramtmg[8])); */
+	/* DRAMTMG8 reset value: 0x44 */
+
+	writel((MCTL_DIV32(tXSDLL) << 0), &mctl_ctl->dramtmg[8]);
+
+	writel((MCTL_DIV32(tREFI) << 16) | (MCTL_DIV2(tRFC) << 0),
+	       &mctl_ctl->rfshtmg);
+
+	if (para->dram_type == DRAM_TYPE_DDR3) {
+		writel((2 << 24) | ((MCTL_DIV2(CL) - 2) << 16) |
+		       (1 << 8) | ((MCTL_DIV2(CWL) - 2) << 0),
+			&mctl_ctl->dfitmg[0]);
+	} else {
+		/* TODO */
+	}
+
+	/* TODO: handle the case of the write latency domain going to 0 ... */
+
+	/*
+	 * Disable dfi_init_complete_en (the triggering of the SDRAM
+	 * initialisation when the PHY initialisation completes).
+	 */
+	clrbits_le32(&mctl_ctl->dfimisc, MCTL_DFIMISC_DFI_INIT_COMPLETE_EN);
+	/* Disable the automatic generation of DLL calibration requests */
+	setbits_le32(&mctl_ctl->dfiupd[0], MCTL_DFIUPD0_DIS_AUTO_CTRLUPD);
+
+	/* A80-Q7: 2T, 1 rank, DDR3, full-32bit-DQ */
+	/* TODO: make 2T and BUSWIDTH configurable  */
+	writel(MCTL_MSTR_DEVICETYPE(para->dram_type) |
+	       MCTL_MSTR_BURSTLENGTH(para->dram_type) |
+	       MCTL_MSTR_ACTIVERANKS(para->rank) |
+	       MCTL_MSTR_2TMODE | MCTL_MSTR_BUSWIDTH32,
+	       &mctl_ctl->mstr);
+
+	if (para->dram_type == DRAM_TYPE_DDR3) {
+		writel(MCTL_ZQCTRL0_TZQCL(MCTL_DIV2(tZQoper)) |
+		       (MCTL_DIV2(tZQCS)), &mctl_ctl->zqctrl[0]);
+		/*
+		 * TODO: is the following really necessary as the bottom
+		 * half should already be 0x100 and the upper half should
+		 * be ignored for a DDR3 device???
+		 */
+		writel(MCTL_ZQCTRL1_TZQSI_x1024(0x100),
+		       &mctl_ctl->zqctrl[1]);
+	} else {
+		writel(MCTL_ZQCTRL0_TZQCL(0x200) | MCTL_ZQCTRL0_TZQCS(0x40),
+		       &mctl_ctl->zqctrl[0]);
+		writel(MCTL_ZQCTRL1_TZQRESET(0x28) |
+		       MCTL_ZQCTRL1_TZQSI_x1024(0x100),
+		       &mctl_ctl->zqctrl[1]);
+	}
+
+	/* Assert dfi_init_complete signal */
+	setbits_le32(&mctl_ctl->dfimisc, MCTL_DFIMISC_DFI_INIT_COMPLETE_EN);
+	/* Disable auto-refresh */
+	setbits_le32(&mctl_ctl->rfshctl3, MCTL_RFSHCTL3_DIS_AUTO_REFRESH);
+
+	/* PHY initialisation */
+
+	/* TODO: make 2T and 8-bank mode configurable  */
+	writel(MCTL_PHY_DCR_BYTEMASK | MCTL_PHY_DCR_2TMODE |
+	       MCTL_PHY_DCR_DDR8BNK | MCTL_PHY_DRAMMODE_DDR3,
+	       &mctl_phy->dcr);
+
+	/* For LPDDR2 or LPDDR3, set DQSGX to 0 before training. */
+	if (para->dram_type != DRAM_TYPE_DDR3)
+		clrbits_le32(&mctl_phy->dsgcr, (3 << 6));
+
+	writel(mr[0], &mctl_phy->mr0);
+	writel(mr[1], &mctl_phy->mr1);
+	writel(mr[2], &mctl_phy->mr2);
+	writel(mr[3], &mctl_phy->mr3);
+
+	/*
+	 * The DFI PHY is running at full rate. We thus use the actual
+	 * timings in clock cycles here.
+	 */
+	writel((tRC << 26) | (tRRD << 22) | (tRAS << 16) |
+	       (tRCD << 12) | (tRP << 8) | (tWTR << 4) | (tRTP << 0),
+		&mctl_phy->dtpr[0]);
+	writel((tMRD << 0) | ((tMOD - 12) << 2) | (tFAW << 5) |
+	       (tRFC << 11) | (tWLMRD << 20) | (tWLO << 26),
+	       &mctl_phy->dtpr[1]);
+	writel((tXS << 0) | (MAX(tXP, tXPDLL) << 10) |
+	       (tCKE << 15) | (tDLLK << 19) |
+	       (MCTL_PHY_TRTODT << 29) | (MCTL_PHY_TRTW << 30) |
+	       (((tCCD - 4) & 0x1) << 31),
+	       &mctl_phy->dtpr[2]);
+
+	/* tDQSCK and tDQSCKmax are used LPDDR2/LPDDR3 */
+	/* writel((tDQSCK << 0) | (tDQSCKMAX << 3), &mctl_phy->dtpr[3]); */
+
+	/*
+	 * We use the same values used by Allwinner's Boot0 for the PTR
+	 * (PHY timing register) configuration that is tied to the PHY
+	 * implementation.
+	 */
+	writel(0x42C21590, &mctl_phy->ptr[0]);
+	writel(0xD05612C0, &mctl_phy->ptr[1]);
+	if (para->dram_type == DRAM_TYPE_DDR3) {
+		const unsigned int tdinit0 = 500 * CONFIG_DRAM_CLK; /* 500us */
+		const unsigned int tdinit1 = (360 * CONFIG_DRAM_CLK + 999) /
+			1000; /* 360ns */
+		const unsigned int tdinit2 = 200 * CONFIG_DRAM_CLK; /* 200us */
+		const unsigned int tdinit3 = CONFIG_DRAM_CLK; /* 1us */
+
+		writel((tdinit1 << 20) | tdinit0, &mctl_phy->ptr[3]);
+		writel((tdinit3 << 18) | tdinit2, &mctl_phy->ptr[4]);
+	} else {
+		/* LPDDR2 or LPDDR3 */
+		const unsigned int tdinit0 = (100 * CONFIG_DRAM_CLK + 999) /
+			1000; /* 100ns */
+		const unsigned int tdinit1 = 200 * CONFIG_DRAM_CLK; /* 200us */
+		const unsigned int tdinit2 = 22 * CONFIG_DRAM_CLK; /* 11us */
+		const unsigned int tdinit3 = 2 * CONFIG_DRAM_CLK; /* 2us */
+
+		writel((tdinit1 << 20) | tdinit0, &mctl_phy->ptr[3]);
+		writel((tdinit3 << 18) | tdinit2, &mctl_phy->ptr[4]);
+	}
+
+	/* TEST ME */
+	writel(0x00203131, &mctl_phy->acmdlr);
+
+	/* TODO: can we enable this for 2 ranks, even when we don't know yet */
+	writel(MCTL_DTCR_DEFAULT | MCTL_DTCR_RANKEN(para->rank),
+	       &mctl_phy->dtcr);
+
+	/* TODO: half width */
+	debug("DX2GCR0 reset: 0x%x\n", readl(&mctl_phy->dx[2].gcr[0]));
+	writel(0x7C000285, &mctl_phy->dx[2].gcr[0]);
+	writel(0x7C000285, &mctl_phy->dx[3].gcr[0]);
+
+	clrsetbits_le32(&mctl_phy->zq[0].pr, 0xff,
+			(CONFIG_DRAM_ZQ >>  0) & 0xff);  /* CK/CA */
+	clrsetbits_le32(&mctl_phy->zq[1].pr, 0xff,
+			(CONFIG_DRAM_ZQ >>  8) & 0xff);  /* DX0/DX1 */
+	clrsetbits_le32(&mctl_phy->zq[2].pr, 0xff,
+			(CONFIG_DRAM_ZQ >> 16) & 0xff);  /* DX2/DX3 */
+
+	/* TODO: make configurable & implement non-ODT path */
+	if (1) {
+		int lane;
+		for (lane = 0; lane < 4; ++lane) {
+			clrbits_le32(&mctl_phy->dx[lane].gcr[2], 0xffff);
+			clrbits_le32(&mctl_phy->dx[lane].gcr[3],
+				     (0x3<<12) | (0x3<<4));
+		}
+	} else {
+		/* TODO: check */
+		int lane;
+		for (lane = 0; lane < 4; ++lane) {
+			clrsetbits_le32(&mctl_phy->dx[lane].gcr[2], 0xffff,
+					0xaaaa);
+			if (para->dram_type == DRAM_TYPE_DDR3)
+				setbits_le32(&mctl_phy->dx[lane].gcr[3],
+					     (0x3<<12) | (0x3<<4));
+			else
+				setbits_le32(&mctl_phy->dx[lane].gcr[3],
+					     0x00000012);
+		}
+	}
+
+	writel(0x04058D02, &mctl_phy->zq[0].cr); /* CK/CA */
+	writel(0x04058D02, &mctl_phy->zq[1].cr); /* DX0/DX1 */
+	writel(0x04058D02, &mctl_phy->zq[2].cr); /* DX2/DX3 */
+
+	/* Disable auto-refresh prior to data training */
+	setbits_le32(&mctl_ctl->rfshctl3, MCTL_RFSHCTL3_DIS_AUTO_REFRESH);
+
+	setbits_le32(&mctl_phy->dsgcr, 0xf << 24); /* unclear what this is... */
+	/* TODO: IODDRM (IO DDR-MODE) for DDR3L */
+	clrsetbits_le32(&mctl_phy->pgcr[1],
+			MCTL_PGCR1_ZCKSEL_MASK,
+			MCTL_PGCR1_IODDRM_DDR3 | MCTL_PGCR1_INHVT_EN);
+
+	setbits_le32(&mctl_phy->pllcr, 0x3 << 19); /* PLL frequency select */
+	/* TODO: single-channel PLL mode??? missing */
+	setbits_le32(&mctl_phy->pllcr,
+		     MCTL_PLLGCR_PLL_BYPASS | MCTL_PLLGCR_PLL_POWERDOWN);
+	/* setbits_le32(&mctl_phy->pir, MCTL_PIR_PLL_BYPASS); included below */
+
+	/* Disable VT compensation */
+	clrbits_le32(&mctl_phy->pgcr[0], 0x3f);
+
+	/* TODO: "other" PLL mode ... 0x20000 seems to be the PLL Bypass */
+	if (para->dram_type == DRAM_TYPE_DDR3)
+		clrsetbits_le32(&mctl_phy->pir, MCTL_PIR_MASK, 0x20df3);
+	else
+		clrsetbits_le32(&mctl_phy->pir, MCTL_PIR_MASK, 0x2c573);
+
+	sdelay(10000); /* XXX necessary? */
+
+	/* Wait for the INIT bit to clear itself... */
+	while ((readl(&mctl_phy->pir) & MCTL_PIR_INIT) != MCTL_PIR_INIT) {
+		/* not done yet -- keep spinning */
+		debug("MCTL_PIR_INIT not set\n");
+		sdelay(1000);
+		/* TODO: implement timeout */
+	}
+
+	/* TODO: not used --- there's a "2rank debug" section here */
+
+	/* Original dram init code which may come in handy later
+	********************************************************
+	 * LPDDR2 and LPDDR3 *
+	if ((para->dram_type) == 6 || (para->dram_type) == 7) {
+		reg_val = mctl_read_w(P0_DSGCR + ch_offset);
+		reg_val &= (~(0x3<<6));		* set DQSGX to 1 *
+		reg_val |= (0x1<<6);		* dqs gate extend *
+		mctl_write_w(P0_DSGCR + ch_offset, reg_val);
+		dram_dbg("DQS Gate Extend Enable!\n", ch_index);
+	}
+
+	 * Disable ZCAL after initial--for nand dma debug--20140330 by YSZ *
+	if (para->dram_tpr13 & (0x1<<31)) {
+		reg_val = mctl_read_w(P0_ZQ0CR + ch_offset);
+		reg_val |= (0x7<<11);
+		mctl_write_w(P0_ZQ0CR + ch_offset, reg_val);
+	}
+	********************************************************
+	*/
+
+	/*
+	 * TODO: more 2-rank support
+	 * (setting the "dqs gate delay to average between 2 rank")
+	 */
+
+	/* check if any errors are set */
+	if (readl(&mctl_phy->pgsr[0]) & MCTL_PGSR0_ERRORS) {
+		debug("Channel %d unavailable!\n", ch_index);
+		return 0;
+	} else{
+		/* initial OK */
+		debug("Channel %d OK!\n", ch_index);
+		/* return 1; */
+	}
+
+	while ((readl(&mctl_ctl->stat) & 0x1) != 0x1) {
+		debug("Waiting for INIT to be done (controller to come up into 'normal operating' mode\n");
+		sdelay(100000);
+		/* init not done */
+		/* TODO: implement time-out */
+	}
+	debug("done\n");
+
+	/* "DDR is controller by contoller" */
+	clrbits_le32(&mctl_phy->pgcr[3], (1 << 25));
+
+	/* TODO: is the following necessary? */
+	debug("DFIMISC before writing 0: 0x%x\n", readl(&mctl_ctl->dfimisc));
+	writel(0, &mctl_ctl->dfimisc);
+
+	/* Enable auto-refresh */
+	clrbits_le32(&mctl_ctl->rfshctl3, MCTL_RFSHCTL3_DIS_AUTO_REFRESH);
+
+	debug("channel_init complete\n");
+	return 1;
+}
+
+signed int DRAMC_get_dram_size(void)
+{
+	struct sunxi_mctl_com_reg * const mctl_com =
+		(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+
+	unsigned int reg_val;
+	unsigned int dram_size;
+	unsigned int temp;
+
+	reg_val = readl(&mctl_com->cr);
+
+	temp = (reg_val >> 8) & 0xf;	/* page size code */
+	dram_size = (temp - 6);		/* (1 << dram_size) * 512Bytes */
+
+	temp = (reg_val >> 4) & 0xf;	/* row width code */
+	dram_size += (temp + 1);	/* (1 << dram_size) * 512Bytes */
+
+	temp = (reg_val >> 2) & 0x3;	/* bank number code */
+	dram_size += (temp + 2);	/* (1 << dram_size) * 512Bytes */
+
+	temp = reg_val & 0x3;		/* rank number code */
+	dram_size += temp;		/* (1 << dram_size) * 512Bytes */
+
+	temp = (reg_val >> 19) & 0x1;	/* channel number code */
+	dram_size += temp;		/* (1 << dram_size) * 512Bytes */
+
+	dram_size = dram_size - 11;	/* (1 << dram_size) MBytes */
+
+	return 1 << dram_size;
+}
+
+unsigned long sunxi_dram_init(void)
+{
+	struct sunxi_mctl_com_reg * const mctl_com =
+		(struct sunxi_mctl_com_reg *)SUNXI_DRAM_COM_BASE;
+
+	struct dram_sun9i_cl_cwl_timing cl_cwl[] = {
+		{ .CL =  5, .CWL = 5, .tCKmin = 3000, .tCKmax = 3300 },
+		{ .CL =  6, .CWL = 5, .tCKmin = 2500, .tCKmax = 3300 },
+		{ .CL =  8, .CWL = 6, .tCKmin = 1875, .tCKmax = 2500 },
+		{ .CL = 10, .CWL = 7, .tCKmin = 1500, .tCKmax = 1875 },
+		{ .CL = 11, .CWL = 8, .tCKmin = 1250, .tCKmax = 1500 }
+	};
+
+	/* Set initial parameters, these get modified by the autodetect code */
+	struct dram_sun9i_para para = {
+		.dram_type = DRAM_TYPE_DDR3,
+		.bus_width = 32,
+		.chan = 2,
+		.rank = 1,
+		/* .rank = 2, */
+		.page_size = 4096,
+		/* .rows = 16, */
+		.rows = 15,
+
+		/* CL/CWL table for the speed bin */
+		.cl_cwl_table = cl_cwl,
+		.cl_cwl_numentries = sizeof(cl_cwl) /
+			sizeof(struct dram_sun9i_cl_cwl_timing),
+
+		/* timings */
+		.tREFI = 7800,	/* 7.8us (up to 85 degC) */
+		.tRFC  = 260,	/* 260ns for 4GBit devices */
+				/* 350ns @ 8GBit */
+
+		.tRCD  = 13750,
+		.tRP   = 13750,
+		.tRC   = 48750,
+		.tRAS  = 35000,
+
+		.tDLLK = 512,
+		.tRTP  = { .ck = 4, .ps = 7500 },
+		.tWTR  = { .ck = 4, .ps = 7500 },
+		.tWR   = 15,
+		.tMRD  = 4,
+		.tMOD  = { .ck = 12, .ps = 15000 },
+		.tCCD  = 4,
+		.tRRD  = { .ck = 4, .ps = 7500 },
+		.tFAW  = 40,
+
+		/* calibration timing */
+		/* .tZQinit = { .ck = 512, .ps = 640000 }, */
+		.tZQoper = { .ck = 256, .ps = 320000 },
+		.tZQCS   = { .ck = 64,  .ps = 80000 },
+
+		/* reset timing */
+		/* .tXPR  = { .ck = 5, .ps = 10000 }, */
+
+		/* self-refresh timings */
+		.tXS  = { .ck = 5, .ps = 10000 },
+		.tXSDLL = 512,
+		.tCKSRE = { .ck = 5, .ps = 10000 },
+		.tCKSRX = { .ck = 5, .ps = 10000 },
+
+		/* power-down timings */
+		.tXP = { .ck = 3, .ps = 6000 },
+		.tXPDLL = { .ck = 10, .ps = 24000 },
+		.tCKE = { .ck = 3, .ps = 5000 },
+
+		/* write leveling timings */
+		.tWLMRD = 40,
+		/* .tWLDQSEN = 25, */
+		.tWLO = 7500,
+		/* .tWLOE = 2000, */
+	};
+
+	/*
+	 * Disable A80 internal 240 ohm resistor.
+	 *
+	 * This code sequence is adapated from Allwinner's Boot0 (see
+	 * https://github.com/allwinner-zh/bootloader.git), as there
+	 * is no documentation for these two registers in the R_PRCM
+	 * block.
+	 */
+	setbits_le32(SUNXI_PRCM_BASE + 0x1e0, (0x3 << 8));
+	writel(0, SUNXI_PRCM_BASE + 0x1e8);
+
+	mctl_sys_init();
+
+	if (!mctl_channel_init(0, &para))
+		return 0;
+
+	/* dual-channel */
+	if (!mctl_channel_init(1, &para)) {
+		/* disable channel 1 */
+		clrsetbits_le32(&mctl_com->cr, MCTL_CR_CHANNEL_MASK,
+				MCTL_CR_CHANNEL_SINGLE);
+		/* disable channel 1 global clock */
+		clrbits_le32(&mctl_com->cr, MCTL_CCR_CH1_CLK_EN);
+	}
+
+	mctl_com_init(&para);
+
+	/* return the proper RAM size */
+	return DRAMC_get_dram_size() << 20;
+}
diff --git a/arch/arm/mach-sunxi/gtbus_sun9i.c b/arch/arm/mach-sunxi/gtbus_sun9i.c
new file mode 100644
index 0000000..c20d3c0
--- /dev/null
+++ b/arch/arm/mach-sunxi/gtbus_sun9i.c
@@ -0,0 +1,48 @@
+/*
+ * GTBUS initialisation for sun9i
+ *
+ * (C) Copyright 2016 Theobroma Systems Design und Consulting GmbH
+ *                    Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/io.h>
+#include <asm/arch/gtbus_sun9i.h>
+#include <asm/arch/sys_proto.h>
+
+#ifdef CONFIG_SPL_BUILD
+
+void gtbus_init(void)
+{
+	struct sunxi_gtbus_reg * const gtbus =
+		(struct sunxi_gtbus_reg *)SUNXI_GTBUS_BASE;
+
+	/*
+	 * We use the same setting that Allwinner used in Boot0 for now.
+	 * It may be advantageous to adjust these for various workloads
+	 * (e.g. headless use cases that focus on IO throughput).
+	 */
+	writel((GT_PRIO_HIGH << GT_PORT_FE0) |
+	       (GT_PRIO_HIGH << GT_PORT_BE1) |
+	       (GT_PRIO_HIGH << GT_PORT_BE2) |
+	       (GT_PRIO_HIGH << GT_PORT_IEP0) |
+	       (GT_PRIO_HIGH << GT_PORT_FE1) |
+	       (GT_PRIO_HIGH << GT_PORT_BE0) |
+	       (GT_PRIO_HIGH << GT_PORT_FE2) |
+	       (GT_PRIO_HIGH << GT_PORT_IEP1),
+	       &gtbus->mst_read_prio_cfg[0]);
+
+	writel(GP_MST_CFG_DEFAULT, &gtbus->mst_cfg[GT_PORT_FE0]);
+	writel(GP_MST_CFG_DEFAULT, &gtbus->mst_cfg[GT_PORT_FE0]);
+	writel(GP_MST_CFG_DEFAULT, &gtbus->mst_cfg[GT_PORT_BE1]);
+	writel(GP_MST_CFG_DEFAULT, &gtbus->mst_cfg[GT_PORT_BE2]);
+	writel(GP_MST_CFG_DEFAULT, &gtbus->mst_cfg[GT_PORT_IEP0]);
+	writel(GP_MST_CFG_DEFAULT, &gtbus->mst_cfg[GT_PORT_FE1]);
+	writel(GP_MST_CFG_DEFAULT, &gtbus->mst_cfg[GT_PORT_BE0]);
+	writel(GP_MST_CFG_DEFAULT, &gtbus->mst_cfg[GT_PORT_FE2]);
+	writel(GP_MST_CFG_DEFAULT, &gtbus->mst_cfg[GT_PORT_IEP1]);
+}
+
+#endif
diff --git a/board/sunxi/Kconfig b/board/sunxi/Kconfig
index c0ffeb3..e1d4ab1 100644
--- a/board/sunxi/Kconfig
+++ b/board/sunxi/Kconfig
@@ -119,6 +119,7 @@
 	bool "sun9i (Allwinner A80)"
 	select CPU_V7
 	select SUNXI_GEN_SUN6I
+	select SUPPORT_SPL
 
 config MACH_SUN50I
 	bool "sun50i (Allwinner A64)"
@@ -141,11 +142,13 @@
 
 config DRAM_CLK
 	int "sunxi dram clock speed"
+	default 792 if MACH_SUN9I
 	default 312 if MACH_SUN6I || MACH_SUN8I
 	default 360 if MACH_SUN4I || MACH_SUN5I || MACH_SUN7I
 	---help---
-	Set the dram clock speed, valid range 240 - 480, must be a multiple
-	of 24.
+	Set the dram clock speed, valid range 240 - 480 (prior to sun9i),
+	must be a multiple of 24. For the sun9i (A80), the tested values
+	(for DDR3-1600) are 312 to 792.
 
 if MACH_SUN5I || MACH_SUN7I
 config DRAM_MBUS_CLK
@@ -160,6 +163,7 @@
 	int "sunxi dram zq value"
 	default 123 if MACH_SUN4I || MACH_SUN5I || MACH_SUN6I || MACH_SUN8I
 	default 127 if MACH_SUN7I
+	default 4145117 if MACH_SUN9I
 	---help---
 	Set the dram zq value.
 
@@ -252,7 +256,7 @@
 config SYS_CLK_FREQ
 	default 816000000 if MACH_SUN50I
 	default 912000000 if MACH_SUN7I
-	default 1008000000 if MACH_SUN4I || MACH_SUN5I || MACH_SUN6I || MACH_SUN8I
+	default 1008000000 if MACH_SUN4I || MACH_SUN5I || MACH_SUN6I || MACH_SUN8I || MACH_SUN9I
 
 config SYS_CONFIG_NAME
 	default "sun4i" if MACH_SUN4I
diff --git a/board/sunxi/MAINTAINERS b/board/sunxi/MAINTAINERS
index f7129b7..d7dc55b 100644
--- a/board/sunxi/MAINTAINERS
+++ b/board/sunxi/MAINTAINERS
@@ -94,6 +94,11 @@
 S:	Maintained
 F:	configs/A33-OLinuXino_defconfig
 
+A80 OPTIMUS BOARD
+M:	Chen-Yu Tsai <wens@csie.org>
+S:	Maintained
+F:	configs/Merrii_A80_Optimus_defconfig
+
 AINOL AW1 BOARD
 M:	Paul Kocialkowski <contact@paulk.fr>
 S:	Maintained
@@ -117,6 +122,11 @@
 F:	configs/Cubieboard2_defconfig
 F:	configs/Cubietruck_defconfig
 
+CUBIEBOARD4 BOARD
+M:	Chen-Yu Tsai <wens@csie.org>
+S:	Maintained
+F:	configs/Cubieboard4_defconfig
+
 CUBIETRUCK-PLUS BOARD
 M:	Chen-Yu Tsai <wens@csie.org>
 S:	Maintained
diff --git a/board/sunxi/board.c b/board/sunxi/board.c
index 6281c9d..5365638 100644
--- a/board/sunxi/board.c
+++ b/board/sunxi/board.c
@@ -307,6 +307,13 @@
 			sunxi_gpio_set_pull(pin, SUNXI_GPIO_PULL_UP);
 			sunxi_gpio_set_drv(pin, 2);
 		}
+#elif defined(CONFIG_MACH_SUN9I)
+		/* SDC2: PC6-PC16 */
+		for (pin = SUNXI_GPC(6); pin <= SUNXI_GPC(16); pin++) {
+			sunxi_gpio_set_cfgpin(pin, SUNXI_GPC_SDC2);
+			sunxi_gpio_set_pull(pin, SUNXI_GPIO_PULL_UP);
+			sunxi_gpio_set_drv(pin, 2);
+		}
 #endif
 		break;
 
diff --git a/configs/Cubieboard4_defconfig b/configs/Cubieboard4_defconfig
new file mode 100644
index 0000000..4d36d39
--- /dev/null
+++ b/configs/Cubieboard4_defconfig
@@ -0,0 +1,18 @@
+CONFIG_ARM=y
+CONFIG_ARCH_SUNXI=y
+CONFIG_MACH_SUN9I=y
+CONFIG_DRAM_CLK=672
+CONFIG_MMC0_CD_PIN="PH18"
+CONFIG_MMC_SUNXI_SLOT_EXTRA=2
+CONFIG_USB0_VBUS_PIN="AXP0-VBUS-ENABLE"
+CONFIG_USB0_VBUS_DET="AXP0-VBUS-DETECT"
+CONFIG_USB0_ID_DET="PH16"
+CONFIG_USB1_VBUS_PIN="PH14"
+CONFIG_USB3_VBUS_PIN="PH15"
+CONFIG_AXP_GPIO=y
+CONFIG_DEFAULT_DEVICE_TREE="sun9i-a80-cubieboard4"
+CONFIG_SPL=y
+# CONFIG_CMD_IMLS is not set
+# CONFIG_CMD_FLASH is not set
+# CONFIG_CMD_FPGA is not set
+CONFIG_AXP809_POWER=y
diff --git a/configs/Merrii_A80_Optimus_defconfig b/configs/Merrii_A80_Optimus_defconfig
index 774d137..6397de5 100644
--- a/configs/Merrii_A80_Optimus_defconfig
+++ b/configs/Merrii_A80_Optimus_defconfig
@@ -1,13 +1,18 @@
 CONFIG_ARM=y
 CONFIG_ARCH_SUNXI=y
 CONFIG_MACH_SUN9I=y
-CONFIG_DRAM_CLK=360
-CONFIG_DRAM_ZQ=123
-CONFIG_SYS_CLK_FREQ=1008000000
+CONFIG_DRAM_CLK=672
 CONFIG_MMC0_CD_PIN="PH18"
+CONFIG_MMC_SUNXI_SLOT_EXTRA=2
+CONFIG_USB0_VBUS_PIN="AXP0-VBUS-ENABLE"
+CONFIG_USB0_VBUS_DET="AXP0-VBUS-DETECT"
+CONFIG_USB0_ID_DET="PH3"
+CONFIG_USB1_VBUS_PIN="PH4"
+CONFIG_USB3_VBUS_PIN="PH5"
+CONFIG_AXP_GPIO=y
 CONFIG_DEFAULT_DEVICE_TREE="sun9i-a80-optimus"
-# CONFIG_SYS_MALLOC_CLEAR_ON_INIT is not set
-CONFIG_CONSOLE_MUX=y
+CONFIG_SPL=y
 # CONFIG_CMD_IMLS is not set
 # CONFIG_CMD_FLASH is not set
 # CONFIG_CMD_FPGA is not set
+CONFIG_AXP809_POWER=y