Merge branch 'master' of git://git.denx.de/u-boot-socfpga
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 7a77b6a..77eab66 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -64,6 +64,20 @@
         default "sa1100" if CPU_SA1100
 	default "armv8" if ARM64
 
+config SYS_ARM_ARCH
+	int
+	default 4 if CPU_ARM720T
+	default 4 if CPU_ARM920T
+	default 5 if CPU_ARM926EJS
+	default 5 if CPU_ARM946ES
+	default 6 if CPU_ARM1136
+	default 6 if CPU_ARM1176
+	default 7 if CPU_V7
+	default 7 if CPU_V7M
+	default 5 if CPU_PXA
+	default 4 if CPU_SA1100
+	default 8 if ARM64
+
 config SEMIHOSTING
 	bool "support boot from semihosting"
 	help
@@ -766,6 +780,7 @@
 
 config TARGET_THUNDERX_88XX
 	bool "Support ThunderX 88xx"
+	select ARM64
 	select OF_CONTROL
 
 endchoice
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index ecd1887..6a07cd1 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -11,7 +11,7 @@
 arch-$(CONFIG_CPU_ARM720T)	=-march=armv4
 arch-$(CONFIG_CPU_ARM920T)	=-march=armv4t
 arch-$(CONFIG_CPU_ARM926EJS)	=-march=armv5te
-arch-$(CONFIG_CPU_ARM946ES)	=-march=armv4
+arch-$(CONFIG_CPU_ARM946ES)	=-march=armv5te
 arch-$(CONFIG_CPU_SA1100)	=-march=armv4
 arch-$(CONFIG_CPU_PXA)		=
 arch-$(CONFIG_CPU_ARM1136)	=-march=armv5
diff --git a/arch/arm/cpu/armv7/am33xx/clock_am43xx.c b/arch/arm/cpu/armv7/am33xx/clock_am43xx.c
index 5c2a2ab..73ea955 100644
--- a/arch/arm/cpu/armv7/am33xx/clock_am43xx.c
+++ b/arch/arm/cpu/armv7/am33xx/clock_am43xx.c
@@ -160,7 +160,7 @@
 }
 #endif
 
-#ifdef CONFIG_USB_DWC3
+#if defined(CONFIG_USB_DWC3) || defined(CONFIG_USB_XHCI_OMAP)
 void enable_usb_clocks(int index)
 {
 	u32 *usbclkctrl = 0;
diff --git a/arch/arm/cpu/armv7/omap-common/hwinit-common.c b/arch/arm/cpu/armv7/omap-common/hwinit-common.c
index 078bdd8..2f9693f 100644
--- a/arch/arm/cpu/armv7/omap-common/hwinit-common.c
+++ b/arch/arm/cpu/armv7/omap-common/hwinit-common.c
@@ -112,6 +112,16 @@
 {
 }
 
+/**
+ * vcores_init() - Assign omap_vcores based on board
+ *
+ * Function to pick the vcores based on board. This is expected to be
+ * overridden in the SoC family board file where desired.
+ */
+void __weak vcores_init(void)
+{
+}
+
 void s_init(void)
 {
 }
@@ -149,6 +159,7 @@
 #endif
 	setup_early_clocks();
 	do_board_detect();
+	vcores_init();
 	prcm_init();
 }
 
diff --git a/arch/arm/cpu/armv7/omap5/hw_data.c b/arch/arm/cpu/armv7/omap5/hw_data.c
index 88e8920..5b91446a 100644
--- a/arch/arm/cpu/armv7/omap5/hw_data.c
+++ b/arch/arm/cpu/armv7/omap5/hw_data.c
@@ -365,35 +365,35 @@
 };
 
 struct vcores_data dra752_volts = {
-	.mpu.value	= VDD_MPU_DRA752,
-	.mpu.efuse.reg	= STD_FUSE_OPP_VMIN_MPU_NOM,
+	.mpu.value	= VDD_MPU_DRA7,
+	.mpu.efuse.reg	= STD_FUSE_OPP_VMIN_MPU,
 	.mpu.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
 	.mpu.addr	= TPS659038_REG_ADDR_SMPS12,
 	.mpu.pmic	= &tps659038,
 	.mpu.abb_tx_done_mask = OMAP_ABB_MPU_TXDONE_MASK,
 
-	.eve.value	= VDD_EVE_DRA752,
-	.eve.efuse.reg	= STD_FUSE_OPP_VMIN_DSPEVE_NOM,
+	.eve.value	= VDD_EVE_DRA7,
+	.eve.efuse.reg	= STD_FUSE_OPP_VMIN_DSPEVE,
 	.eve.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
 	.eve.addr	= TPS659038_REG_ADDR_SMPS45,
 	.eve.pmic	= &tps659038,
 	.eve.abb_tx_done_mask = OMAP_ABB_EVE_TXDONE_MASK,
 
-	.gpu.value	= VDD_GPU_DRA752,
-	.gpu.efuse.reg	= STD_FUSE_OPP_VMIN_GPU_NOM,
+	.gpu.value	= VDD_GPU_DRA7,
+	.gpu.efuse.reg	= STD_FUSE_OPP_VMIN_GPU,
 	.gpu.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
 	.gpu.addr	= TPS659038_REG_ADDR_SMPS6,
 	.gpu.pmic	= &tps659038,
 	.gpu.abb_tx_done_mask = OMAP_ABB_GPU_TXDONE_MASK,
 
-	.core.value	= VDD_CORE_DRA752,
-	.core.efuse.reg	= STD_FUSE_OPP_VMIN_CORE_NOM,
+	.core.value	= VDD_CORE_DRA7,
+	.core.efuse.reg	= STD_FUSE_OPP_VMIN_CORE,
 	.core.efuse.reg_bits = DRA752_EFUSE_REGBITS,
 	.core.addr	= TPS659038_REG_ADDR_SMPS7,
 	.core.pmic	= &tps659038,
 
-	.iva.value	= VDD_IVA_DRA752,
-	.iva.efuse.reg	= STD_FUSE_OPP_VMIN_IVA_NOM,
+	.iva.value	= VDD_IVA_DRA7,
+	.iva.efuse.reg	= STD_FUSE_OPP_VMIN_IVA,
 	.iva.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
 	.iva.addr	= TPS659038_REG_ADDR_SMPS8,
 	.iva.pmic	= &tps659038,
@@ -401,15 +401,15 @@
 };
 
 struct vcores_data dra722_volts = {
-	.mpu.value	= VDD_MPU_DRA72x,
-	.mpu.efuse.reg	= STD_FUSE_OPP_VMIN_MPU_NOM,
+	.mpu.value	= VDD_MPU_DRA7,
+	.mpu.efuse.reg	= STD_FUSE_OPP_VMIN_MPU,
 	.mpu.efuse.reg_bits = DRA752_EFUSE_REGBITS,
 	.mpu.addr	= TPS65917_REG_ADDR_SMPS1,
 	.mpu.pmic	= &tps659038,
 	.mpu.abb_tx_done_mask = OMAP_ABB_MPU_TXDONE_MASK,
 
-	.core.value	= VDD_CORE_DRA72x,
-	.core.efuse.reg	= STD_FUSE_OPP_VMIN_CORE_NOM,
+	.core.value	= VDD_CORE_DRA7,
+	.core.efuse.reg	= STD_FUSE_OPP_VMIN_CORE,
 	.core.efuse.reg_bits = DRA752_EFUSE_REGBITS,
 	.core.addr	= TPS65917_REG_ADDR_SMPS2,
 	.core.pmic	= &tps659038,
@@ -418,22 +418,22 @@
 	 * The DSPEVE, GPU and IVA rails are usually grouped on DRA72x
 	 * designs and powered by TPS65917 SMPS3, as on the J6Eco EVM.
 	 */
-	.gpu.value	= VDD_GPU_DRA72x,
-	.gpu.efuse.reg	= STD_FUSE_OPP_VMIN_GPU_NOM,
+	.gpu.value	= VDD_GPU_DRA7,
+	.gpu.efuse.reg	= STD_FUSE_OPP_VMIN_GPU,
 	.gpu.efuse.reg_bits = DRA752_EFUSE_REGBITS,
 	.gpu.addr	= TPS65917_REG_ADDR_SMPS3,
 	.gpu.pmic	= &tps659038,
 	.gpu.abb_tx_done_mask = OMAP_ABB_GPU_TXDONE_MASK,
 
-	.eve.value	= VDD_EVE_DRA72x,
-	.eve.efuse.reg	= STD_FUSE_OPP_VMIN_DSPEVE_NOM,
+	.eve.value	= VDD_EVE_DRA7,
+	.eve.efuse.reg	= STD_FUSE_OPP_VMIN_DSPEVE,
 	.eve.efuse.reg_bits = DRA752_EFUSE_REGBITS,
 	.eve.addr	= TPS65917_REG_ADDR_SMPS3,
 	.eve.pmic	= &tps659038,
 	.eve.abb_tx_done_mask = OMAP_ABB_EVE_TXDONE_MASK,
 
-	.iva.value	= VDD_IVA_DRA72x,
-	.iva.efuse.reg	= STD_FUSE_OPP_VMIN_IVA_NOM,
+	.iva.value	= VDD_IVA_DRA7,
+	.iva.efuse.reg	= STD_FUSE_OPP_VMIN_IVA,
 	.iva.efuse.reg_bits = DRA752_EFUSE_REGBITS,
 	.iva.addr	= TPS65917_REG_ADDR_SMPS3,
 	.iva.pmic	= &tps659038,
@@ -602,7 +602,7 @@
 }
 #endif
 
-#ifdef CONFIG_USB_DWC3
+#if defined(CONFIG_USB_DWC3) || defined(CONFIG_USB_XHCI_OMAP)
 void enable_usb_clocks(int index)
 {
 	u32 cm_l3init_usb_otg_ss_clkctrl = 0;
@@ -614,9 +614,14 @@
 		setbits_le32((*prcm)->cm_l3init_usb_otg_ss1_clkctrl,
 			     OPTFCLKEN_REFCLK960M);
 
-		/* Enable 32 KHz clock for dwc3 */
+		/* Enable 32 KHz clock for USB_PHY1 */
 		setbits_le32((*prcm)->cm_coreaon_usb_phy1_core_clkctrl,
 			     USBPHY_CORE_CLKCTRL_OPTFCLKEN_CLK32K);
+
+		/* Enable 32 KHz clock for USB_PHY3 */
+		if (is_dra7xx())
+			setbits_le32((*prcm)->cm_coreaon_usb_phy3_core_clkctrl,
+				     USBPHY_CORE_CLKCTRL_OPTFCLKEN_CLK32K);
 	} else if (index == 1) {
 		cm_l3init_usb_otg_ss_clkctrl =
 			(*prcm)->cm_l3init_usb_otg_ss2_clkctrl;
@@ -664,9 +669,14 @@
 		clrbits_le32((*prcm)->cm_l3init_usb_otg_ss1_clkctrl,
 			     OPTFCLKEN_REFCLK960M);
 
-		/* Disable 32 KHz clock for dwc3 */
+		/* Disable 32 KHz clock for USB_PHY1 */
 		clrbits_le32((*prcm)->cm_coreaon_usb_phy1_core_clkctrl,
 			     USBPHY_CORE_CLKCTRL_OPTFCLKEN_CLK32K);
+
+		/* Disable 32 KHz clock for USB_PHY3 */
+		if (is_dra7xx())
+			clrbits_le32((*prcm)->cm_coreaon_usb_phy3_core_clkctrl,
+				     USBPHY_CORE_CLKCTRL_OPTFCLKEN_CLK32K);
 	} else if (index == 1) {
 		cm_l3init_usb_otg_ss_clkctrl =
 			(*prcm)->cm_l3init_usb_otg_ss2_clkctrl;
diff --git a/arch/arm/cpu/armv7/omap5/prcm-regs.c b/arch/arm/cpu/armv7/omap5/prcm-regs.c
index 655e92b..b5f1d70 100644
--- a/arch/arm/cpu/armv7/omap5/prcm-regs.c
+++ b/arch/arm/cpu/armv7/omap5/prcm-regs.c
@@ -820,6 +820,7 @@
 	.cm_clkmode_dpll_gmac			= 0x4a0052a8,
 	.cm_coreaon_usb_phy1_core_clkctrl	= 0x4a008640,
 	.cm_coreaon_usb_phy2_core_clkctrl	= 0x4a008688,
+	.cm_coreaon_usb_phy3_core_clkctrl	= 0x4a008698,
 	.cm_coreaon_l3init_60m_gfclk_clkctrl	= 0x4a0086c0,
 
 	/* cm1.mpu */
diff --git a/arch/arm/include/asm/arch-omap5/clock.h b/arch/arm/include/asm/arch-omap5/clock.h
index 38d50d6..551c927 100644
--- a/arch/arm/include/asm/arch-omap5/clock.h
+++ b/arch/arm/include/asm/arch-omap5/clock.h
@@ -239,19 +239,22 @@
 #define VDD_MPU_ES2_LOW 880
 #define VDD_MM_ES2_LOW 880
 
-/* DRA74x/75x voltage settings in mv for OPP_NOM per DM */
-#define VDD_MPU_DRA752		1100
-#define VDD_EVE_DRA752		1060
-#define VDD_GPU_DRA752		1060
-#define VDD_CORE_DRA752		1060
-#define VDD_IVA_DRA752		1060
+/* DRA74x/75x/72x voltage settings in mv for OPP_NOM per DM */
+#define VDD_MPU_DRA7_NOM	1150
+#define VDD_CORE_DRA7_NOM	1150
+#define VDD_EVE_DRA7_NOM	1060
+#define VDD_GPU_DRA7_NOM	1060
+#define VDD_IVA_DRA7_NOM	1060
 
-/* DRA72x voltage settings in mv for OPP_NOM per DM */
-#define VDD_MPU_DRA72x		1100
-#define VDD_EVE_DRA72x		1060
-#define VDD_GPU_DRA72x		1060
-#define VDD_CORE_DRA72x		1060
-#define VDD_IVA_DRA72x		1060
+/* DRA74x/75x/72x voltage settings in mv for OPP_OD per DM */
+#define VDD_EVE_DRA7_OD		1150
+#define VDD_GPU_DRA7_OD		1150
+#define VDD_IVA_DRA7_OD		1150
+
+/* DRA74x/75x/72x voltage settings in mv for OPP_HIGH per DM */
+#define VDD_EVE_DRA7_HIGH	1250
+#define VDD_GPU_DRA7_HIGH	1250
+#define VDD_IVA_DRA7_HIGH	1250
 
 /* Efuse register offsets for DRA7xx platform */
 #define DRA752_EFUSE_BASE	0x4A002000
@@ -283,6 +286,20 @@
 /* STD_FUSE_OPP_VMIN_MPU_4 */
 #define STD_FUSE_OPP_VMIN_MPU_HIGH	(DRA752_EFUSE_BASE + 0x1B28)
 
+/* Common voltage and Efuse register macros */
+/* DRA74x/DRA75x/DRA72x */
+#define VDD_MPU_DRA7			VDD_MPU_DRA7_NOM
+#define VDD_CORE_DRA7			VDD_CORE_DRA7_NOM
+#define VDD_EVE_DRA7			VDD_EVE_DRA7_NOM
+#define VDD_GPU_DRA7			VDD_GPU_DRA7_NOM
+#define VDD_IVA_DRA7			VDD_IVA_DRA7_NOM
+
+#define STD_FUSE_OPP_VMIN_MPU		STD_FUSE_OPP_VMIN_MPU_NOM
+#define STD_FUSE_OPP_VMIN_CORE		STD_FUSE_OPP_VMIN_CORE_NOM
+#define STD_FUSE_OPP_VMIN_DSPEVE	STD_FUSE_OPP_VMIN_DSPEVE_NOM
+#define STD_FUSE_OPP_VMIN_GPU		STD_FUSE_OPP_VMIN_GPU_NOM
+#define STD_FUSE_OPP_VMIN_IVA		STD_FUSE_OPP_VMIN_IVA_NOM
+
 /* Standard offset is 0.5v expressed in uv */
 #define PALMAS_SMPS_BASE_VOLT_UV 500000
 
diff --git a/arch/arm/include/asm/arch-omap5/sys_proto.h b/arch/arm/include/asm/arch-omap5/sys_proto.h
index 804266a..ab0e7fa 100644
--- a/arch/arm/include/asm/arch-omap5/sys_proto.h
+++ b/arch/arm/include/asm/arch-omap5/sys_proto.h
@@ -51,6 +51,7 @@
 void setup_early_clocks(void);
 void prcm_init(void);
 void do_board_detect(void);
+void vcores_init(void);
 void bypass_dpll(u32 const base);
 void freq_update_core(void);
 u32 get_sys_clk_freq(void);
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 11b80fb..ae1e42f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -15,6 +15,7 @@
  */
 
 #include <config.h>
+#include <asm/unified.h>
 
 /*
  * Endian independent macros for shifting bytes within registers.
diff --git a/arch/arm/include/asm/omap_common.h b/arch/arm/include/asm/omap_common.h
index ac34b0e..07f3848 100644
--- a/arch/arm/include/asm/omap_common.h
+++ b/arch/arm/include/asm/omap_common.h
@@ -145,6 +145,7 @@
 	u32 cm_ssc_modfreqdiv_dpll_unipro;
 	u32 cm_coreaon_usb_phy1_core_clkctrl;
 	u32 cm_coreaon_usb_phy2_core_clkctrl;
+	u32 cm_coreaon_usb_phy3_core_clkctrl;
 	u32 cm_coreaon_l3init_60m_gfclk_clkctrl;
 
 	/* cm2.core */
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
new file mode 100644
index 0000000..1b26002
--- /dev/null
+++ b/arch/arm/include/asm/unified.h
@@ -0,0 +1,129 @@
+/*
+ * include/asm-arm/unified.h - Unified Assembler Syntax helper macros
+ *
+ * Copyright (C) 2008 ARM Limited
+ *
+ * SPDX-License-Identifier:	GPL-2.0
+ */
+
+#ifndef __ASM_UNIFIED_H
+#define __ASM_UNIFIED_H
+
+#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED)
+	.syntax unified
+#endif
+
+#ifdef CONFIG_CPU_V7M
+#define AR_CLASS(x...)
+#define M_CLASS(x...)	x
+#else
+#define AR_CLASS(x...)	x
+#define M_CLASS(x...)
+#endif
+
+#ifdef CONFIG_THUMB2_KERNEL
+
+#if __GNUC__ < 4
+#error Thumb-2 kernel requires gcc >= 4
+#endif
+
+/* The CPSR bit describing the instruction set (Thumb) */
+#define PSR_ISETSTATE	PSR_T_BIT
+
+#define ARM(x...)
+#define THUMB(x...)	x
+#ifdef __ASSEMBLY__
+#define W(instr)	instr.w
+#else
+#define WASM(instr)	#instr ".w"
+#endif
+
+#else	/* !CONFIG_THUMB2_KERNEL */
+
+/* The CPSR bit describing the instruction set (ARM) */
+#define PSR_ISETSTATE	0
+
+#define ARM(x...)	x
+#define THUMB(x...)
+#ifdef __ASSEMBLY__
+#define W(instr)	instr
+#else
+#define WASM(instr)	#instr
+#endif
+
+#endif	/* CONFIG_THUMB2_KERNEL */
+
+#ifndef CONFIG_ARM_ASM_UNIFIED
+
+/*
+ * If the unified assembly syntax isn't used (in ARM mode), these
+ * macros expand to an empty string
+ */
+#ifdef __ASSEMBLY__
+	.macro	it, cond
+	.endm
+	.macro	itt, cond
+	.endm
+	.macro	ite, cond
+	.endm
+	.macro	ittt, cond
+	.endm
+	.macro	itte, cond
+	.endm
+	.macro	itet, cond
+	.endm
+	.macro	itee, cond
+	.endm
+	.macro	itttt, cond
+	.endm
+	.macro	ittte, cond
+	.endm
+	.macro	ittet, cond
+	.endm
+	.macro	ittee, cond
+	.endm
+	.macro	itett, cond
+	.endm
+	.macro	itete, cond
+	.endm
+	.macro	iteet, cond
+	.endm
+	.macro	iteee, cond
+	.endm
+#else	/* !__ASSEMBLY__ */
+__asm__(
+"	.macro	it, cond\n"
+"	.endm\n"
+"	.macro	itt, cond\n"
+"	.endm\n"
+"	.macro	ite, cond\n"
+"	.endm\n"
+"	.macro	ittt, cond\n"
+"	.endm\n"
+"	.macro	itte, cond\n"
+"	.endm\n"
+"	.macro	itet, cond\n"
+"	.endm\n"
+"	.macro	itee, cond\n"
+"	.endm\n"
+"	.macro	itttt, cond\n"
+"	.endm\n"
+"	.macro	ittte, cond\n"
+"	.endm\n"
+"	.macro	ittet, cond\n"
+"	.endm\n"
+"	.macro	ittee, cond\n"
+"	.endm\n"
+"	.macro	itett, cond\n"
+"	.endm\n"
+"	.macro	itete, cond\n"
+"	.endm\n"
+"	.macro	iteet, cond\n"
+"	.endm\n"
+"	.macro	iteee, cond\n"
+"	.endm\n");
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* CONFIG_ARM_ASM_UNIFIED */
+
+#endif	/* !__ASM_UNIFIED_H */
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index b535dbe..0e05e87 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -5,9 +5,9 @@
 # SPDX-License-Identifier:	GPL-2.0+
 #
 
-lib-$(CONFIG_USE_PRIVATE_LIBGCC) += _ashldi3.o _ashrdi3.o _divsi3.o \
-			_lshrdi3.o _modsi3.o _udivsi3.o _umodsi3.o div0.o \
-			_uldivmod.o
+lib-$(CONFIG_USE_PRIVATE_LIBGCC) += ashldi3.o ashrdi3.o lshrdi3.o \
+				    lib1funcs.o uldivmod.o div0.o \
+				    div64.o muldi3.o
 
 ifdef CONFIG_CPU_V7M
 obj-y	+= vectors_m.o crt0.o
@@ -62,9 +62,17 @@
 extra-y	+= eabi_compat.o
 endif
 
+asflags-y += -DCONFIG_ARM_ASM_UNIFIED
+ifeq ($(CONFIG_SPL_BUILD)$(CONFIG_TEGRA),yy)
+asflags-y += -D__LINUX_ARM_ARCH__=4
+else
+asflags-y += -D__LINUX_ARM_ARCH__=$(CONFIG_SYS_ARM_ARCH)
+endif
+
 # some files can only build in ARM or THUMB2, not THUMB1
 
 ifdef CONFIG_SYS_THUMB_BUILD
+asflags-$(CONFIG_HAS_THUMB2) += -DCONFIG_THUMB2_KERNEL
 ifndef CONFIG_HAS_THUMB2
 
 # for C files, just apend -marm, which will override previous -mthumb*
@@ -82,6 +90,5 @@
 AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork
 AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD
 AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD
-
 endif
 endif
diff --git a/arch/arm/lib/_divsi3.S b/arch/arm/lib/_divsi3.S
deleted file mode 100644
index c463c68..0000000
--- a/arch/arm/lib/_divsi3.S
+++ /dev/null
@@ -1,143 +0,0 @@
-#include <linux/linkage.h>
-
-.macro ARM_DIV_BODY dividend, divisor, result, curbit
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	\curbit, \divisor
-	clz	\result, \dividend
-	sub	\result, \curbit, \result
-	mov	\curbit, #1
-	mov	\divisor, \divisor, lsl \result
-	mov	\curbit, \curbit, lsl \result
-	mov	\result, #0
-
-#else
-
-	@ Initially shift the divisor left 3 bits if possible,
-	@ set curbit accordingly.  This allows for curbit to be located
-	@ at the left end of each 4 bit nibbles in the division loop
-	@ to save one loop in most cases.
-	tst	\divisor, #0xe0000000
-	moveq	\divisor, \divisor, lsl #3
-	moveq	\curbit, #8
-	movne	\curbit, #1
-
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is
-	@ larger than the dividend.
-1:	cmp	\divisor, #0x10000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #4
-	movlo	\curbit, \curbit, lsl #4
-	blo	1b
-
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-1:	cmp	\divisor, #0x80000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #1
-	movlo	\curbit, \curbit, lsl #1
-	blo	1b
-
-	mov	\result, #0
-
-#endif
-
-	@ Division loop
-1:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	orrhs	\result,   \result,   \curbit
-	cmp	\dividend, \divisor,  lsr #1
-	subhs	\dividend, \dividend, \divisor, lsr #1
-	orrhs	\result,   \result,   \curbit,  lsr #1
-	cmp	\dividend, \divisor,  lsr #2
-	subhs	\dividend, \dividend, \divisor, lsr #2
-	orrhs	\result,   \result,   \curbit,  lsr #2
-	cmp	\dividend, \divisor,  lsr #3
-	subhs	\dividend, \dividend, \divisor, lsr #3
-	orrhs	\result,   \result,   \curbit,  lsr #3
-	cmp	\dividend, #0			@ Early termination?
-	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
-	movne	\divisor,  \divisor, lsr #4
-	bne	1b
-
-.endm
-
-.macro ARM_DIV2_ORDER divisor, order
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	\order, \divisor
-	rsb	\order, \order, #31
-
-#else
-
-	cmp	\divisor, #(1 << 16)
-	movhs	\divisor, \divisor, lsr #16
-	movhs	\order, #16
-	movlo	\order, #0
-
-	cmp	\divisor, #(1 << 8)
-	movhs	\divisor, \divisor, lsr #8
-	addhs	\order, \order, #8
-
-	cmp	\divisor, #(1 << 4)
-	movhs	\divisor, \divisor, lsr #4
-	addhs	\order, \order, #4
-
-	cmp	\divisor, #(1 << 2)
-	addhi	\order, \order, #3
-	addls	\order, \order, \divisor, lsr #1
-
-#endif
-
-.endm
-
-	.align	5
-.globl __divsi3
-__divsi3:
-ENTRY(__aeabi_idiv)
-	cmp	r1, #0
-	eor	ip, r0, r1			@ save the sign of the result.
-	beq	Ldiv0
-	rsbmi	r1, r1, #0			@ loops below use unsigned.
-	subs	r2, r1, #1			@ division by 1 or -1 ?
-	beq	10f
-	movs	r3, r0
-	rsbmi	r3, r0, #0			@ positive dividend value
-	cmp	r3, r1
-	bls	11f
-	tst	r1, r2				@ divisor is power of 2 ?
-	beq	12f
-
-	ARM_DIV_BODY r3, r1, r0, r2
-
-	cmp	ip, #0
-	rsbmi	r0, r0, #0
-	mov	pc, lr
-
-10:	teq	ip, r0				@ same sign ?
-	rsbmi	r0, r0, #0
-	mov	pc, lr
-
-11:	movlo	r0, #0
-	moveq	r0, ip, asr #31
-	orreq	r0, r0, #1
-	mov	pc, lr
-
-12:	ARM_DIV2_ORDER r1, r2
-
-	cmp	ip, #0
-	mov	r0, r3, lsr r2
-	rsbmi	r0, r0, #0
-	mov	pc, lr
-
-Ldiv0:
-
-	str	lr, [sp, #-4]!
-	bl	__div0
-	mov	r0, #0			@ About as wrong as it could be.
-	ldr	pc, [sp], #4
-ENDPROC(__aeabi_idiv)
diff --git a/arch/arm/lib/_modsi3.S b/arch/arm/lib/_modsi3.S
deleted file mode 100644
index c5e1c22..0000000
--- a/arch/arm/lib/_modsi3.S
+++ /dev/null
@@ -1,99 +0,0 @@
-#include <linux/linkage.h>
-
-.macro ARM_MOD_BODY dividend, divisor, order, spare
-
-#if __LINUX_ARM_ARCH__ >= 5
-
-	clz	\order, \divisor
-	clz	\spare, \dividend
-	sub	\order, \order, \spare
-	mov	\divisor, \divisor, lsl \order
-
-#else
-
-	mov	\order, #0
-
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is
-	@ larger than the dividend.
-1:	cmp	\divisor, #0x10000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #4
-	addlo	\order, \order, #4
-	blo	1b
-
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-1:	cmp	\divisor, #0x80000000
-	cmplo	\divisor, \dividend
-	movlo	\divisor, \divisor, lsl #1
-	addlo	\order, \order, #1
-	blo	1b
-
-#endif
-
-	@ Perform all needed substractions to keep only the reminder.
-	@ Do comparisons in batch of 4 first.
-	subs	\order, \order, #3		@ yes, 3 is intended here
-	blt	2f
-
-1:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	cmp	\dividend, \divisor,  lsr #1
-	subhs	\dividend, \dividend, \divisor, lsr #1
-	cmp	\dividend, \divisor,  lsr #2
-	subhs	\dividend, \dividend, \divisor, lsr #2
-	cmp	\dividend, \divisor,  lsr #3
-	subhs	\dividend, \dividend, \divisor, lsr #3
-	cmp	\dividend, #1
-	mov	\divisor, \divisor, lsr #4
-	subges	\order, \order, #4
-	bge	1b
-
-	tst	\order, #3
-	teqne	\dividend, #0
-	beq	5f
-
-	@ Either 1, 2 or 3 comparison/substractions are left.
-2:	cmn	\order, #2
-	blt	4f
-	beq	3f
-	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	mov	\divisor,  \divisor,  lsr #1
-3:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-	mov	\divisor,  \divisor,  lsr #1
-4:	cmp	\dividend, \divisor
-	subhs	\dividend, \dividend, \divisor
-5:
-.endm
-
-	.align	5
-ENTRY(__modsi3)
-	cmp	r1, #0
-	beq	Ldiv0
-	rsbmi	r1, r1, #0			@ loops below use unsigned.
-	movs	ip, r0				@ preserve sign of dividend
-	rsbmi	r0, r0, #0			@ if negative make positive
-	subs	r2, r1, #1			@ compare divisor with 1
-	cmpne	r0, r1				@ compare dividend with divisor
-	moveq	r0, #0
-	tsthi	r1, r2				@ see if divisor is power of 2
-	andeq	r0, r0, r2
-	bls	10f
-
-	ARM_MOD_BODY r0, r1, r2, r3
-
-10:	cmp	ip, #0
-	rsbmi	r0, r0, #0
-	mov	pc, lr
-ENDPROC(__modsi3)
-
-Ldiv0:
-
-	str	lr, [sp, #-4]!
-	bl	__div0
-	mov	r0, #0			@ About as wrong as it could be.
-	ldr	pc, [sp], #4
diff --git a/arch/arm/lib/_udivsi3.S b/arch/arm/lib/_udivsi3.S
deleted file mode 100644
index 3b653be..0000000
--- a/arch/arm/lib/_udivsi3.S
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <linux/linkage.h>
-
-/* # 1 "libgcc1.S" */
-@ libgcc1 routines for ARM cpu.
-@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
-dividend	.req	r0
-divisor		.req	r1
-result		.req	r2
-curbit		.req	r3
-/* ip		.req	r12	*/
-/* sp		.req	r13	*/
-/* lr		.req	r14	*/
-/* pc		.req	r15	*/
-	.text
-	.globl	 __udivsi3
-	.type	__udivsi3 ,function
-	.globl	__aeabi_uidiv
-	.type	__aeabi_uidiv ,function
-	.align	0
- __udivsi3:
- __aeabi_uidiv:
-	cmp	divisor, #0
-	beq	Ldiv0
-	mov	curbit, #1
-	mov	result, #0
-	cmp	dividend, divisor
-	bcc	Lgot_result
-Loop1:
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is
-	@ larger than the dividend.
-	cmp	divisor, #0x10000000
-	cmpcc	divisor, dividend
-	movcc	divisor, divisor, lsl #4
-	movcc	curbit, curbit, lsl #4
-	bcc	Loop1
-Lbignum:
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-	cmp	divisor, #0x80000000
-	cmpcc	divisor, dividend
-	movcc	divisor, divisor, lsl #1
-	movcc	curbit, curbit, lsl #1
-	bcc	Lbignum
-Loop3:
-	@ Test for possible subtractions, and note which bits
-	@ are done in the result.  On the final pass, this may subtract
-	@ too much from the dividend, but the result will be ok, since the
-	@ "bit" will have been shifted out at the bottom.
-	cmp	dividend, divisor
-	subcs	dividend, dividend, divisor
-	orrcs	result, result, curbit
-	cmp	dividend, divisor, lsr #1
-	subcs	dividend, dividend, divisor, lsr #1
-	orrcs	result, result, curbit, lsr #1
-	cmp	dividend, divisor, lsr #2
-	subcs	dividend, dividend, divisor, lsr #2
-	orrcs	result, result, curbit, lsr #2
-	cmp	dividend, divisor, lsr #3
-	subcs	dividend, dividend, divisor, lsr #3
-	orrcs	result, result, curbit, lsr #3
-	cmp	dividend, #0			@ Early termination?
-	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
-	movne	divisor, divisor, lsr #4
-	bne	Loop3
-Lgot_result:
-	mov	r0, result
-	mov	pc, lr
-Ldiv0:
-	str	lr, [sp, #-4]!
-	bl	 __div0       (PLT)
-	mov	r0, #0			@ about as wrong as it could be
-	ldmia	sp!, {pc}
-	.size  __udivsi3       , . -  __udivsi3
-
-ENTRY(__aeabi_uidivmod)
-
-	stmfd	sp!, {r0, r1, ip, lr}
-	bl	__aeabi_uidiv
-	ldmfd	sp!, {r1, r2, ip, lr}
-	mul	r3, r0, r2
-	sub	r1, r1, r3
-	mov	pc, lr
-ENDPROC(__aeabi_uidivmod)
-
-ENTRY(__aeabi_idivmod)
-
-	stmfd	sp!, {r0, r1, ip, lr}
-	bl	__aeabi_idiv
-	ldmfd	sp!, {r1, r2, ip, lr}
-	mul	r3, r0, r2
-	sub	r1, r1, r3
-	mov	pc, lr
-ENDPROC(__aeabi_idivmod)
diff --git a/arch/arm/lib/_umodsi3.S b/arch/arm/lib/_umodsi3.S
deleted file mode 100644
index b166737..0000000
--- a/arch/arm/lib/_umodsi3.S
+++ /dev/null
@@ -1,90 +0,0 @@
-#include <linux/linkage.h>
-
-/* # 1 "libgcc1.S" */
-@ libgcc1 routines for ARM cpu.
-@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
-/* # 145 "libgcc1.S" */
-dividend	.req	r0
-divisor		.req	r1
-overdone	.req	r2
-curbit		.req	r3
-/* ip		.req	r12	*/
-/* sp		.req	r13	*/
-/* lr		.req	r14	*/
-/* pc		.req	r15	*/
-	.text
-	.type  __umodsi3       ,function
-	.align 0
- ENTRY(__umodsi3)
-	cmp	divisor, #0
-	beq	Ldiv0
-	mov	curbit, #1
-	cmp	dividend, divisor
-	movcc	pc, lr
-Loop1:
-	@ Unless the divisor is very big, shift it up in multiples of
-	@ four bits, since this is the amount of unwinding in the main
-	@ division loop.  Continue shifting until the divisor is
-	@ larger than the dividend.
-	cmp	divisor, #0x10000000
-	cmpcc	divisor, dividend
-	movcc	divisor, divisor, lsl #4
-	movcc	curbit, curbit, lsl #4
-	bcc	Loop1
-Lbignum:
-	@ For very big divisors, we must shift it a bit at a time, or
-	@ we will be in danger of overflowing.
-	cmp	divisor, #0x80000000
-	cmpcc	divisor, dividend
-	movcc	divisor, divisor, lsl #1
-	movcc	curbit, curbit, lsl #1
-	bcc	Lbignum
-Loop3:
-	@ Test for possible subtractions.  On the final pass, this may
-	@ subtract too much from the dividend, so keep track of which
-	@ subtractions are done, we can fix them up afterwards...
-	mov	overdone, #0
-	cmp	dividend, divisor
-	subcs	dividend, dividend, divisor
-	cmp	dividend, divisor, lsr #1
-	subcs	dividend, dividend, divisor, lsr #1
-	orrcs	overdone, overdone, curbit, ror #1
-	cmp	dividend, divisor, lsr #2
-	subcs	dividend, dividend, divisor, lsr #2
-	orrcs	overdone, overdone, curbit, ror #2
-	cmp	dividend, divisor, lsr #3
-	subcs	dividend, dividend, divisor, lsr #3
-	orrcs	overdone, overdone, curbit, ror #3
-	mov	ip, curbit
-	cmp	dividend, #0			@ Early termination?
-	movnes	curbit, curbit, lsr #4		@ No, any more bits to do?
-	movne	divisor, divisor, lsr #4
-	bne	Loop3
-	@ Any subtractions that we should not have done will be recorded in
-	@ the top three bits of "overdone".  Exactly which were not needed
-	@ are governed by the position of the bit, stored in ip.
-	@ If we terminated early, because dividend became zero,
-	@ then none of the below will match, since the bit in ip will not be
-	@ in the bottom nibble.
-	ands	overdone, overdone, #0xe0000000
-	moveq	pc, lr				@ No fixups needed
-	tst	overdone, ip, ror #3
-	addne	dividend, dividend, divisor, lsr #3
-	tst	overdone, ip, ror #2
-	addne	dividend, dividend, divisor, lsr #2
-	tst	overdone, ip, ror #1
-	addne	dividend, dividend, divisor, lsr #1
-	mov	pc, lr
-Ldiv0:
-	str	lr, [sp, #-4]!
-	bl	 __div0       (PLT)
-	mov	r0, #0			@ about as wrong as it could be
-	ldmia	sp!, {pc}
-	.size  __umodsi3       , . -  __umodsi3
-/* # 320 "libgcc1.S" */
-/* # 421 "libgcc1.S" */
-/* # 433 "libgcc1.S" */
-/* # 456 "libgcc1.S" */
-/* # 500 "libgcc1.S" */
-/* # 580 "libgcc1.S" */
-ENDPROC(__umodsi3)
diff --git a/arch/arm/lib/_ashldi3.S b/arch/arm/lib/ashldi3.S
similarity index 65%
rename from arch/arm/lib/_ashldi3.S
rename to arch/arm/lib/ashldi3.S
index 9c34c21..6c9ae91 100644
--- a/arch/arm/lib/_ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -5,6 +5,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -14,15 +15,20 @@
 #define ah r1
 #endif
 
-.globl __ashldi3
-__ashldi3:
+ENTRY(__ashldi3)
 ENTRY(__aeabi_llsl)
+.pushsection .text.__ashldi3, "ax"
 
 	subs	r3, r2, #32
 	rsb	ip, r2, #32
 	movmi	ah, ah, lsl r2
 	movpl	ah, al, lsl r3
-	orrmi	ah, ah, al, lsr ip
+ ARM(	orrmi	ah, ah, al, lsr ip	)
+ THUMB(	lsrmi	r3, al, ip		)
+ THUMB(	orrmi	ah, ah, r3		)
 	mov	al, al, lsl r2
-	mov	pc, lr
+	ret	lr
+
+.popsection
+ENDPROC(__ashldi3)
 ENDPROC(__aeabi_llsl)
diff --git a/arch/arm/lib/_ashrdi3.S b/arch/arm/lib/ashrdi3.S
similarity index 65%
rename from arch/arm/lib/_ashrdi3.S
rename to arch/arm/lib/ashrdi3.S
index c74fd64..3eb59ec 100644
--- a/arch/arm/lib/_ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -5,6 +5,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -14,15 +15,20 @@
 #define ah r1
 #endif
 
-.globl __ashrdi3
-__ashrdi3:
+ENTRY(__ashrdi3)
 ENTRY(__aeabi_lasr)
+.pushsection .text.__ashrdi3, "ax"
 
 	subs	r3, r2, #32
 	rsb	ip, r2, #32
 	movmi	al, al, lsr r2
 	movpl	al, ah, asr r3
-	orrmi	al, al, ah, lsl ip
+ ARM(	orrmi	al, al, ah, lsl ip	)
+ THUMB(	lslmi	r3, ah, ip		)
+ THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, asr r2
-	mov	pc, lr
+	ret	lr
+
+.popsection
+ENDPROC(__ashrdi3)
 ENDPROC(__aeabi_lasr)
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
new file mode 100644
index 0000000..5bfb41d
--- /dev/null
+++ b/arch/arm/lib/div64.S
@@ -0,0 +1,214 @@
+/*
+ *  linux/arch/arm/lib/div64.S
+ *
+ *  Optimized computation of 64-bit dividend / 32-bit divisor
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	Oct 5, 2003
+ *  Copyright:	Monta Vista Software, Inc.
+ *
+ *  SPDX-License-Identifier:	GPL-2.0
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#ifdef __UBOOT__
+#define UNWIND(x...)
+#endif
+
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+/*
+ * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
+ *
+ * Note: Calling convention is totally non standard for optimal code.
+ *       This is meant to be used by do_div() from include/asm/div64.h only.
+ *
+ * Input parameters:
+ * 	xh-xl	= dividend (clobbered)
+ * 	r4	= divisor (preserved)
+ *
+ * Output values:
+ * 	yh-yl	= result
+ * 	xh	= remainder
+ *
+ * Clobbered regs: xl, ip
+ */
+
+ENTRY(__do_div64)
+UNWIND(.fnstart)
+.pushsection .text.__do_div64, "ax"
+
+	@ Test for easy paths first.
+	subs	ip, r4, #1
+	bls	9f			@ divisor is 0 or 1
+	tst	ip, r4
+	beq	8f			@ divisor is power of 2
+
+	@ See if we need to handle upper 32-bit result.
+	cmp	xh, r4
+	mov	yh, #0
+	blo	3f
+
+	@ Align divisor with upper part of dividend.
+	@ The aligned divisor is stored in yl preserving the original.
+	@ The bit position is stored in ip.
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	yl, r4
+	clz	ip, xh
+	sub	yl, yl, ip
+	mov	ip, #1
+	mov	ip, ip, lsl yl
+	mov	yl, r4, lsl yl
+
+#else
+
+	mov	yl, r4
+	mov	ip, #1
+1:	cmp	yl, #0x80000000
+	cmpcc	yl, xh
+	movcc	yl, yl, lsl #1
+	movcc	ip, ip, lsl #1
+	bcc	1b
+
+#endif
+
+	@ The division loop for needed upper bit positions.
+ 	@ Break out early if dividend reaches 0.
+2:	cmp	xh, yl
+	orrcs	yh, yh, ip
+	subscs	xh, xh, yl
+	movsne	ip, ip, lsr #1
+	mov	yl, yl, lsr #1
+	bne	2b
+
+	@ See if we need to handle lower 32-bit result.
+3:	cmp	xh, #0
+	mov	yl, #0
+	cmpeq	xl, r4
+	movlo	xh, xl
+	retlo	lr
+
+	@ The division loop for lower bit positions.
+	@ Here we shift remainer bits leftwards rather than moving the
+	@ divisor for comparisons, considering the carry-out bit as well.
+	mov	ip, #0x80000000
+4:	movs	xl, xl, lsl #1
+	adcs	xh, xh, xh
+	beq	6f
+	cmpcc	xh, r4
+5:	orrcs	yl, yl, ip
+	subcs	xh, xh, r4
+	movs	ip, ip, lsr #1
+	bne	4b
+	ret	lr
+
+	@ The top part of remainder became zero.  If carry is set
+	@ (the 33th bit) this is a false positive so resume the loop.
+	@ Otherwise, if lower part is also null then we are done.
+6:	bcs	5b
+	cmp	xl, #0
+	reteq	lr
+
+	@ We still have remainer bits in the low part.  Bring them up.
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	xh, xl			@ we know xh is zero here so...
+	add	xh, xh, #1
+	mov	xl, xl, lsl xh
+	mov	ip, ip, lsr xh
+
+#else
+
+7:	movs	xl, xl, lsl #1
+	mov	ip, ip, lsr #1
+	bcc	7b
+
+#endif
+
+	@ Current remainder is now 1.  It is worthless to compare with
+	@ divisor at this point since divisor can not be smaller than 3 here.
+	@ If possible, branch for another shift in the division loop.
+	@ If no bit position left then we are done.
+	movs	ip, ip, lsr #1
+	mov	xh, #1
+	bne	4b
+	ret	lr
+
+8:	@ Division by a power of 2: determine what that divisor order is
+	@ then simply shift values around
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	ip, r4
+	rsb	ip, ip, #31
+
+#else
+
+	mov	yl, r4
+	cmp	r4, #(1 << 16)
+	mov	ip, #0
+	movhs	yl, yl, lsr #16
+	movhs	ip, #16
+
+	cmp	yl, #(1 << 8)
+	movhs	yl, yl, lsr #8
+	addhs	ip, ip, #8
+
+	cmp	yl, #(1 << 4)
+	movhs	yl, yl, lsr #4
+	addhs	ip, ip, #4
+
+	cmp	yl, #(1 << 2)
+	addhi	ip, ip, #3
+	addls	ip, ip, yl, lsr #1
+
+#endif
+
+	mov	yh, xh, lsr ip
+	mov	yl, xl, lsr ip
+	rsb	ip, ip, #32
+ ARM(	orr	yl, yl, xh, lsl ip	)
+ THUMB(	lsl	xh, xh, ip		)
+ THUMB(	orr	yl, yl, xh		)
+	mov	xh, xl, lsl ip
+	mov	xh, xh, lsr ip
+	ret	lr
+
+	@ eq -> division by 1: obvious enough...
+9:	moveq	yl, xl
+	moveq	yh, xh
+	moveq	xh, #0
+	reteq	lr
+.popsection
+UNWIND(.fnend)
+
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+Ldiv0_64:
+	@ Division by 0:
+	str	lr, [sp, #-8]!
+	bl	__div0
+
+	@ as wrong as it could be...
+	mov	yl, #0
+	mov	yh, #0
+	mov	xh, #0
+	ldr	pc, [sp], #8
+
+UNWIND(.fnend)
+ENDPROC(__do_div64)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
new file mode 100644
index 0000000..f1becda
--- /dev/null
+++ b/arch/arm/lib/lib1funcs.S
@@ -0,0 +1,429 @@
+/*
+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
+ *
+ * Author: Nicolas Pitre <nico@fluxnic.net>
+ *   - contributed to gcc-3.4 on Sep 30, 2003
+ *   - adapted for the Linux kernel on Oct 2, 2003
+ */
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
+
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * U-Boot compatibility bit, define empty UNWIND() macro as, since we
+ * do not support stack unwinding and define CONFIG_AEABI to make all
+ * of the functions available without diverging from Linux code.
+ */
+#ifdef __UBOOT__
+#define UNWIND(x...)
+#define CONFIG_AEABI
+#endif
+
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	\curbit, \divisor
+	clz	\result, \dividend
+	sub	\result, \curbit, \result
+	mov	\curbit, #1
+	mov	\divisor, \divisor, lsl \result
+	mov	\curbit, \curbit, lsl \result
+	mov	\result, #0
+	
+#else
+
+	@ Initially shift the divisor left 3 bits if possible,
+	@ set curbit accordingly.  This allows for curbit to be located
+	@ at the left end of each 4 bit nibbles in the division loop
+	@ to save one loop in most cases.
+	tst	\divisor, #0xe0000000
+	moveq	\divisor, \divisor, lsl #3
+	moveq	\curbit, #8
+	movne	\curbit, #1
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	movlo	\curbit, \curbit, lsl #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	movlo	\curbit, \curbit, lsl #1
+	blo	1b
+
+	mov	\result, #0
+
+#endif
+
+	@ Division loop
+1:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	orrhs	\result,   \result,   \curbit
+	cmp	\dividend, \divisor,  lsr #1
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	orrhs	\result,   \result,   \curbit,  lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	orrhs	\result,   \result,   \curbit,  lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	orrhs	\result,   \result,   \curbit,  lsr #3
+	cmp	\dividend, #0			@ Early termination?
+	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
+	movne	\divisor,  \divisor, lsr #4
+	bne	1b
+
+.endm
+
+
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	rsb	\order, \order, #31
+
+#else
+
+	cmp	\divisor, #(1 << 16)
+	movhs	\divisor, \divisor, lsr #16
+	movhs	\order, #16
+	movlo	\order, #0
+
+	cmp	\divisor, #(1 << 8)
+	movhs	\divisor, \divisor, lsr #8
+	addhs	\order, \order, #8
+
+	cmp	\divisor, #(1 << 4)
+	movhs	\divisor, \divisor, lsr #4
+	addhs	\order, \order, #4
+
+	cmp	\divisor, #(1 << 2)
+	addhi	\order, \order, #3
+	addls	\order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+
+
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	clz	\spare, \dividend
+	sub	\order, \order, \spare
+	mov	\divisor, \divisor, lsl \order
+
+#else
+
+	mov	\order, #0
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	addlo	\order, \order, #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	addlo	\order, \order, #1
+	blo	1b
+
+#endif
+
+	@ Perform all needed subtractions to keep only the reminder.
+	@ Do comparisons in batch of 4 first.
+	subs	\order, \order, #3		@ yes, 3 is intended here
+	blt	2f
+
+1:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	cmp	\dividend, \divisor,  lsr #1
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	cmp	\dividend, #1
+	mov	\divisor, \divisor, lsr #4
+	subsge	\order, \order, #4
+	bge	1b
+
+	tst	\order, #3
+	teqne	\dividend, #0
+	beq	5f
+
+	@ Either 1, 2 or 3 comparison/subtractions are left.
+2:	cmn	\order, #2
+	blt	4f
+	beq	3f
+	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+3:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+4:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+5:
+.endm
+
+
+ENTRY(__udivsi3)
+ENTRY(__aeabi_uidiv)
+UNWIND(.fnstart)
+.pushsection .text.__udivsi3, "ax"
+
+	subs	r2, r1, #1
+	reteq	lr
+	bcc	Ldiv0
+	cmp	r0, r1
+	bls	11f
+	tst	r1, r2
+	beq	12f
+
+	ARM_DIV_BODY r0, r1, r2, r3
+
+	mov	r0, r2
+	ret	lr
+
+11:	moveq	r0, #1
+	movne	r0, #0
+	ret	lr
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	mov	r0, r0, lsr r2
+	ret	lr
+
+.popsection
+UNWIND(.fnend)
+ENDPROC(__udivsi3)
+ENDPROC(__aeabi_uidiv)
+
+ENTRY(__umodsi3)
+UNWIND(.fnstart)
+.pushsection .text.__umodsi3, "ax"
+
+	subs	r2, r1, #1			@ compare divisor with 1
+	bcc	Ldiv0
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq   r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	retls	lr
+
+	ARM_MOD_BODY r0, r1, r2, r3
+
+	ret	lr
+
+.popsection
+UNWIND(.fnend)
+ENDPROC(__umodsi3)
+
+ENTRY(__divsi3)
+ENTRY(__aeabi_idiv)
+UNWIND(.fnstart)
+.pushsection .text.__divsi3, "ax"
+
+	cmp	r1, #0
+	eor	ip, r0, r1			@ save the sign of the result.
+	beq	Ldiv0
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	subs	r2, r1, #1			@ division by 1 or -1 ?
+	beq	10f
+	movs	r3, r0
+	rsbmi	r3, r0, #0			@ positive dividend value
+	cmp	r3, r1
+	bls	11f
+	tst	r1, r2				@ divisor is power of 2 ?
+	beq	12f
+
+	ARM_DIV_BODY r3, r1, r0, r2
+
+	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	ret	lr
+
+10:	teq	ip, r0				@ same sign ?
+	rsbmi	r0, r0, #0
+	ret	lr
+
+11:	movlo	r0, #0
+	moveq	r0, ip, asr #31
+	orreq	r0, r0, #1
+	ret	lr
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	cmp	ip, #0
+	mov	r0, r3, lsr r2
+	rsbmi	r0, r0, #0
+	ret	lr
+
+.popsection
+UNWIND(.fnend)
+ENDPROC(__divsi3)
+ENDPROC(__aeabi_idiv)
+
+ENTRY(__modsi3)
+UNWIND(.fnstart)
+.pushsection .text.__modsi3, "ax"
+
+	cmp	r1, #0
+	beq	Ldiv0
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	movs	ip, r0				@ preserve sign of dividend
+	rsbmi	r0, r0, #0			@ if negative make positive
+	subs	r2, r1, #1			@ compare divisor with 1
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq	r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	bls	10f
+
+	ARM_MOD_BODY r0, r1, r2, r3
+
+10:	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	ret	lr
+
+.popsection
+UNWIND(.fnend)
+ENDPROC(__modsi3)
+
+#ifdef CONFIG_AEABI
+
+ENTRY(__aeabi_uidivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr}	)
+.pushsection .text.__aeabi_uidivmod, "ax"
+
+	stmfd	sp!, {r0, r1, ip, lr}
+	bl	__aeabi_uidiv
+	ldmfd	sp!, {r1, r2, ip, lr}
+	mul	r3, r0, r2
+	sub	r1, r1, r3
+	ret	lr
+
+.popsection
+UNWIND(.fnend)
+ENDPROC(__aeabi_uidivmod)
+
+ENTRY(__aeabi_idivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr}	)
+.pushsection .text.__aeabi_uidivmod, "ax"
+
+	stmfd	sp!, {r0, r1, ip, lr}
+	bl	__aeabi_idiv
+	ldmfd	sp!, {r1, r2, ip, lr}
+	mul	r3, r0, r2
+	sub	r1, r1, r3
+	ret	lr
+
+.popsection
+UNWIND(.fnend)
+ENDPROC(__aeabi_idivmod)
+
+#endif
+
+Ldiv0:
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+.pushsection .text.Ldiv0, "ax"
+
+	str	lr, [sp, #-8]!
+	bl	__div0
+	mov	r0, #0			@ About as wrong as it could be.
+	ldr	pc, [sp], #8
+
+.popsection
+UNWIND(.fnend)
+ENDPROC(Ldiv0)
+
+/* Thumb-1 specialities */
+#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
+ENTRY(__gnu_thumb1_case_sqi)
+.pushsection .text.__gnu_thumb1_case_sqi, "ax"
+	push	{r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r1, r1, #1
+	ldrsb	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r1}
+	bx	lr
+.popsection
+ENDPROC(__gnu_thumb1_case_sqi)
+
+ENTRY(__gnu_thumb1_case_uqi)
+.pushsection .text.__gnu_thumb1_case_uqi, "ax"
+	push	{r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r1, r1, #1
+	ldrb	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r1}
+	bx	lr
+.popsection
+ENDPROC(__gnu_thumb1_case_uqi)
+
+ENTRY(__gnu_thumb1_case_shi)
+.pushsection .text.__gnu_thumb1_case_shi, "ax"
+	push	{r0, r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r0, r0, #1
+	lsls	r1, r1, #1
+	ldrsh	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r0, r1}
+	bx	lr
+.popsection
+ENDPROC(__gnu_thumb1_case_shi)
+
+ENTRY(__gnu_thumb1_case_uhi)
+.pushsection .text.__gnu_thumb1_case_uhi, "ax"
+	push	{r0, r1}
+	mov	r1, lr
+	lsrs	r1, r1, #1
+	lsls	r0, r0, #1
+	lsls	r1, r1, #1
+	ldrh	r1, [r1, r0]
+	lsls	r1, r1, #1
+	add	lr, lr, r1
+	pop	{r0, r1}
+	bx	lr
+.popsection
+ENDPROC(__gnu_thumb1_case_uhi)
+#endif
diff --git a/arch/arm/lib/_lshrdi3.S b/arch/arm/lib/lshrdi3.S
similarity index 65%
rename from arch/arm/lib/_lshrdi3.S
rename to arch/arm/lib/lshrdi3.S
index 1f9b916..f710ccb 100644
--- a/arch/arm/lib/_lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -5,6 +5,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #ifdef __ARMEB__
 #define al r1
@@ -14,15 +15,20 @@
 #define ah r1
 #endif
 
-.globl __lshrdi3
-__lshrdi3:
+ENTRY(__lshrdi3)
 ENTRY(__aeabi_llsr)
+.pushsection .text.__lshldi3, "ax"
 
 	subs	r3, r2, #32
 	rsb	ip, r2, #32
 	movmi	al, al, lsr r2
 	movpl	al, ah, lsr r3
-	orrmi	al, al, ah, lsl ip
+ ARM(	orrmi	al, al, ah, lsl ip	)
+ THUMB(	lslmi	r3, ah, ip		)
+ THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, lsr r2
-	mov	pc, lr
+	ret	lr
+
+.popsection
+ENDPROC(__lshrdi3)
 ENDPROC(__aeabi_llsr)
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index 7d9fc0f..00602e9 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -13,12 +13,6 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(MEMCPY_NO_THUMB_BUILD)
-#define W(instr)	instr.w
-#else
-#define W(instr)	instr
-#endif
-
 #define LDR1W_SHIFT	0
 #define STR1W_SHIFT	0
 
diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S
new file mode 100644
index 0000000..bc255c5
--- /dev/null
+++ b/arch/arm/lib/muldi3.S
@@ -0,0 +1,48 @@
+/*
+ *  linux/arch/arm/lib/muldi3.S
+ *
+ *  Author:     Nicolas Pitre
+ *  Created:    Oct 19, 2005
+ *  Copyright:  Monta Vista Software, Inc.
+ *
+ *  SPDX-License-Identifier:	GPL-2.0
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+ENTRY(__muldi3)
+ENTRY(__aeabi_lmul)
+.pushsection .text.__muldi3, "ax"
+
+	mul	xh, yl, xh
+	mla	xh, xl, yh, xh
+	mov	ip, xl, lsr #16
+	mov	yh, yl, lsr #16
+	bic	xl, xl, ip, lsl #16
+	bic	yl, yl, yh, lsl #16
+	mla	xh, yh, ip, xh
+	mul	yh, xl, yh
+	mul	xl, yl, xl
+	mul	ip, yl, ip
+	adds	xl, xl, yh, lsl #16
+	adc	xh, xh, yh, lsr #16
+	adds	xl, xl, ip, lsl #16
+	adc	xh, xh, ip, lsr #16
+	ret	lr
+
+.popsection
+ENDPROC(__muldi3)
+ENDPROC(__aeabi_lmul)
diff --git a/arch/arm/lib/_uldivmod.S b/arch/arm/lib/uldivmod.S
similarity index 96%
rename from arch/arm/lib/_uldivmod.S
rename to arch/arm/lib/uldivmod.S
index 426c2f2..bbc44c6 100644
--- a/arch/arm/lib/_uldivmod.S
+++ b/arch/arm/lib/uldivmod.S
@@ -9,10 +9,6 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-/* We don't use Thumb instructions for now */
-#define ARM(x...)	x
-#define THUMB(x...)
-
 /*
  * A, Q = r0 + (r1 << 32)
  * B, R = r2 + (r3 << 32)
@@ -38,6 +34,8 @@
 )
 
 ENTRY(__aeabi_uldivmod)
+.pushsection .text.__aeabi_uldivmod, "ax"
+
 	stmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
 	@ Test if B == 0
 	orrs	ip, B_0, B_1		@ Z set -> B == 0
@@ -226,7 +224,9 @@
 	@ Shift A to the right by the appropriate amount.
 	rsb	D_1, D_0, #32
 	mov	Q_0, A_0, lsr D_0
-	orr	Q_0, A_1, lsl D_1
+ ARM(   orr     Q_0, Q_0, A_1, lsl D_1	)
+ THUMB(	lsl	A_1, D_1		)
+ THUMB(	orr	Q_0, A_1		)
 	mov	Q_1, A_1, lsr D_0
 	@ Move C to R
 	mov	R_0, C_0
@@ -242,4 +242,5 @@
 	mov	R_0, #0
 	mov	R_1, #0
 	ldmfd	sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
+.popsection
 ENDPROC(__aeabi_uldivmod)
diff --git a/arch/arm/mach-keystone/include/mach/hardware-k2g.h b/arch/arm/mach-keystone/include/mach/hardware-k2g.h
index ca2a119..0f6bf61 100644
--- a/arch/arm/mach-keystone/include/mach/hardware-k2g.h
+++ b/arch/arm/mach-keystone/include/mach/hardware-k2g.h
@@ -74,4 +74,16 @@
 #define K2G_GPIO_DIR_OFFSET		0x0
 #define K2G_GPIO_SETDATA_OFFSET		0x8
 
+/* BOOTCFG RESETMUX8 */
+#define KS2_RSTMUX8			(KS2_DEVICE_STATE_CTRL_BASE + 0x328)
+
+/* RESETMUX register definitions */
+#define RSTMUX_LOCK8_SHIFT		0x0
+#define RSTMUX_LOCK8_MASK		(0x1 << 0)
+#define RSTMUX_OMODE8_SHIFT		0x1
+#define RSTMUX_OMODE8_MASK		(0x7 << 1)
+#define RSTMUX_OMODE8_DEV_RESET		0x2
+#define RSTMUX_OMODE8_INT		0x3
+#define RSTMUX_OMODE8_INT_AND_DEV_RESET	0x4
+
 #endif /* __ASM_ARCH_HARDWARE_K2G_H */
diff --git a/board/ti/am43xx/board.c b/board/ti/am43xx/board.c
index b42546a..bde5ac7 100644
--- a/board/ti/am43xx/board.c
+++ b/board/ti/am43xx/board.c
@@ -678,71 +678,71 @@
 	.index = 1,
 };
 
+int usb_gadget_handle_interrupts(int index)
+{
+	u32 status;
+
+	status = dwc3_omap_uboot_interrupt_status(index);
+	if (status)
+		dwc3_uboot_handle_interrupt(index);
+
+	return 0;
+}
+#endif /* CONFIG_USB_DWC3 */
+
+#if defined(CONFIG_USB_DWC3) || defined(CONFIG_USB_XHCI_OMAP)
 int board_usb_init(int index, enum usb_init_type init)
 {
 	enable_usb_clocks(index);
+#ifdef CONFIG_USB_DWC3
 	switch (index) {
 	case 0:
 		if (init == USB_INIT_DEVICE) {
 			usb_otg_ss1.dr_mode = USB_DR_MODE_PERIPHERAL;
 			usb_otg_ss1_glue.vbus_id_status = OMAP_DWC3_VBUS_VALID;
-		} else {
-			usb_otg_ss1.dr_mode = USB_DR_MODE_HOST;
-			usb_otg_ss1_glue.vbus_id_status = OMAP_DWC3_ID_GROUND;
+			dwc3_omap_uboot_init(&usb_otg_ss1_glue);
+			ti_usb_phy_uboot_init(&usb_phy1_device);
+			dwc3_uboot_init(&usb_otg_ss1);
 		}
-
-		dwc3_omap_uboot_init(&usb_otg_ss1_glue);
-		ti_usb_phy_uboot_init(&usb_phy1_device);
-		dwc3_uboot_init(&usb_otg_ss1);
 		break;
 	case 1:
 		if (init == USB_INIT_DEVICE) {
 			usb_otg_ss2.dr_mode = USB_DR_MODE_PERIPHERAL;
 			usb_otg_ss2_glue.vbus_id_status = OMAP_DWC3_VBUS_VALID;
-		} else {
-			usb_otg_ss2.dr_mode = USB_DR_MODE_HOST;
-			usb_otg_ss2_glue.vbus_id_status = OMAP_DWC3_ID_GROUND;
+			ti_usb_phy_uboot_init(&usb_phy2_device);
+			dwc3_omap_uboot_init(&usb_otg_ss2_glue);
+			dwc3_uboot_init(&usb_otg_ss2);
 		}
-
-		ti_usb_phy_uboot_init(&usb_phy2_device);
-		dwc3_omap_uboot_init(&usb_otg_ss2_glue);
-		dwc3_uboot_init(&usb_otg_ss2);
 		break;
 	default:
 		printf("Invalid Controller Index\n");
 	}
+#endif
 
 	return 0;
 }
 
 int board_usb_cleanup(int index, enum usb_init_type init)
 {
+#ifdef CONFIG_USB_DWC3
 	switch (index) {
 	case 0:
 	case 1:
-		ti_usb_phy_uboot_exit(index);
-		dwc3_uboot_exit(index);
-		dwc3_omap_uboot_exit(index);
+		if (init == USB_INIT_DEVICE) {
+			ti_usb_phy_uboot_exit(index);
+			dwc3_uboot_exit(index);
+			dwc3_omap_uboot_exit(index);
+		}
 		break;
 	default:
 		printf("Invalid Controller Index\n");
 	}
+#endif
 	disable_usb_clocks(index);
 
 	return 0;
 }
-
-int usb_gadget_handle_interrupts(int index)
-{
-	u32 status;
-
-	status = dwc3_omap_uboot_interrupt_status(index);
-	if (status)
-		dwc3_uboot_handle_interrupt(index);
-
-	return 0;
-}
-#endif
+#endif /* defined(CONFIG_USB_DWC3) || defined(CONFIG_USB_XHCI_OMAP) */
 
 #ifdef CONFIG_DRIVER_TI_CPSW
 
diff --git a/board/ti/am57xx/board.c b/board/ti/am57xx/board.c
index 9904047..ccf97b2 100644
--- a/board/ti/am57xx/board.c
+++ b/board/ti/am57xx/board.c
@@ -63,28 +63,28 @@
 }
 
 static const struct emif_regs beagle_x15_emif1_ddr3_532mhz_emif_regs = {
-	.sdram_config_init	= 0x61851b32,
-	.sdram_config		= 0x61851b32,
-	.sdram_config2		= 0x08000000,
-	.ref_ctrl		= 0x000040F1,
-	.ref_ctrl_final		= 0x00001035,
-	.sdram_tim1		= 0xcccf36ab,
-	.sdram_tim2		= 0x308f7fda,
-	.sdram_tim3		= 0x409f88a8,
-	.read_idle_ctrl		= 0x00050000,
-	.zq_config		= 0x5007190b,
-	.temp_alert_config	= 0x00000000,
-	.emif_ddr_phy_ctlr_1_init = 0x0024400b,
-	.emif_ddr_phy_ctlr_1	= 0x0e24400b,
-	.emif_ddr_ext_phy_ctrl_1 = 0x10040100,
-	.emif_ddr_ext_phy_ctrl_2 = 0x00910091,
-	.emif_ddr_ext_phy_ctrl_3 = 0x00950095,
-	.emif_ddr_ext_phy_ctrl_4 = 0x009b009b,
-	.emif_ddr_ext_phy_ctrl_5 = 0x009e009e,
-	.emif_rd_wr_lvl_rmp_win	= 0x00000000,
-	.emif_rd_wr_lvl_rmp_ctl	= 0x80000000,
-	.emif_rd_wr_lvl_ctl	= 0x00000000,
-	.emif_rd_wr_exec_thresh	= 0x00000305
+	.sdram_config_init		= 0x61851b32,
+	.sdram_config			= 0x61851b32,
+	.sdram_config2			= 0x08000000,
+	.ref_ctrl			= 0x000040F1,
+	.ref_ctrl_final			= 0x00001035,
+	.sdram_tim1			= 0xcccf36ab,
+	.sdram_tim2			= 0x308f7fda,
+	.sdram_tim3			= 0x409f88a8,
+	.read_idle_ctrl			= 0x00050000,
+	.zq_config			= 0x5007190b,
+	.temp_alert_config		= 0x00000000,
+	.emif_ddr_phy_ctlr_1_init 	= 0x0024400b,
+	.emif_ddr_phy_ctlr_1		= 0x0e24400b,
+	.emif_ddr_ext_phy_ctrl_1 	= 0x10040100,
+	.emif_ddr_ext_phy_ctrl_2 	= 0x00910091,
+	.emif_ddr_ext_phy_ctrl_3 	= 0x00950095,
+	.emif_ddr_ext_phy_ctrl_4 	= 0x009b009b,
+	.emif_ddr_ext_phy_ctrl_5 	= 0x009e009e,
+	.emif_rd_wr_lvl_rmp_win		= 0x00000000,
+	.emif_rd_wr_lvl_rmp_ctl		= 0x80000000,
+	.emif_rd_wr_lvl_ctl		= 0x00000000,
+	.emif_rd_wr_exec_thresh		= 0x00000305
 };
 
 /* Ext phy ctrl regs 1-35 */
@@ -127,28 +127,28 @@
 };
 
 static const struct emif_regs beagle_x15_emif2_ddr3_532mhz_emif_regs = {
-	.sdram_config_init	= 0x61851b32,
-	.sdram_config		= 0x61851b32,
-	.sdram_config2		= 0x08000000,
-	.ref_ctrl		= 0x000040F1,
-	.ref_ctrl_final		= 0x00001035,
-	.sdram_tim1		= 0xcccf36b3,
-	.sdram_tim2		= 0x308f7fda,
-	.sdram_tim3		= 0x407f88a8,
-	.read_idle_ctrl		= 0x00050000,
-	.zq_config		= 0x5007190b,
-	.temp_alert_config	= 0x00000000,
-	.emif_ddr_phy_ctlr_1_init = 0x0024400b,
-	.emif_ddr_phy_ctlr_1	= 0x0e24400b,
-	.emif_ddr_ext_phy_ctrl_1 = 0x10040100,
-	.emif_ddr_ext_phy_ctrl_2 = 0x00910091,
-	.emif_ddr_ext_phy_ctrl_3 = 0x00950095,
-	.emif_ddr_ext_phy_ctrl_4 = 0x009b009b,
-	.emif_ddr_ext_phy_ctrl_5 = 0x009e009e,
-	.emif_rd_wr_lvl_rmp_win	= 0x00000000,
-	.emif_rd_wr_lvl_rmp_ctl	= 0x80000000,
-	.emif_rd_wr_lvl_ctl	= 0x00000000,
-	.emif_rd_wr_exec_thresh	= 0x00000305
+	.sdram_config_init		= 0x61851b32,
+	.sdram_config			= 0x61851b32,
+	.sdram_config2			= 0x08000000,
+	.ref_ctrl			= 0x000040F1,
+	.ref_ctrl_final			= 0x00001035,
+	.sdram_tim1			= 0xcccf36b3,
+	.sdram_tim2			= 0x308f7fda,
+	.sdram_tim3			= 0x407f88a8,
+	.read_idle_ctrl			= 0x00050000,
+	.zq_config			= 0x5007190b,
+	.temp_alert_config		= 0x00000000,
+	.emif_ddr_phy_ctlr_1_init 	= 0x0024400b,
+	.emif_ddr_phy_ctlr_1		= 0x0e24400b,
+	.emif_ddr_ext_phy_ctrl_1 	= 0x10040100,
+	.emif_ddr_ext_phy_ctrl_2 	= 0x00910091,
+	.emif_ddr_ext_phy_ctrl_3 	= 0x00950095,
+	.emif_ddr_ext_phy_ctrl_4 	= 0x009b009b,
+	.emif_ddr_ext_phy_ctrl_5 	= 0x009e009e,
+	.emif_rd_wr_lvl_rmp_win		= 0x00000000,
+	.emif_rd_wr_lvl_rmp_ctl		= 0x80000000,
+	.emif_rd_wr_lvl_ctl		= 0x00000000,
+	.emif_rd_wr_exec_thresh		= 0x00000305
 };
 
 static const u32 beagle_x15_emif2_ddr3_ext_phy_ctrl_const_regs[] = {
@@ -216,41 +216,77 @@
 }
 
 struct vcores_data beagle_x15_volts = {
-	.mpu.value		= VDD_MPU_DRA752,
-	.mpu.efuse.reg		= STD_FUSE_OPP_VMIN_MPU_NOM,
+	.mpu.value		= VDD_MPU_DRA7,
+	.mpu.efuse.reg		= STD_FUSE_OPP_VMIN_MPU,
 	.mpu.efuse.reg_bits     = DRA752_EFUSE_REGBITS,
 	.mpu.addr		= TPS659038_REG_ADDR_SMPS12,
 	.mpu.pmic		= &tps659038,
-	.mpu.abb_tx_done_mask = OMAP_ABB_MPU_TXDONE_MASK,
+	.mpu.abb_tx_done_mask	= OMAP_ABB_MPU_TXDONE_MASK,
 
-	.eve.value		= VDD_EVE_DRA752,
-	.eve.efuse.reg		= STD_FUSE_OPP_VMIN_DSPEVE_NOM,
+	.eve.value		= VDD_EVE_DRA7,
+	.eve.efuse.reg		= STD_FUSE_OPP_VMIN_DSPEVE,
 	.eve.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
 	.eve.addr		= TPS659038_REG_ADDR_SMPS45,
 	.eve.pmic		= &tps659038,
 	.eve.abb_tx_done_mask	= OMAP_ABB_EVE_TXDONE_MASK,
 
-	.gpu.value		= VDD_GPU_DRA752,
-	.gpu.efuse.reg		= STD_FUSE_OPP_VMIN_GPU_NOM,
+	.gpu.value		= VDD_GPU_DRA7,
+	.gpu.efuse.reg		= STD_FUSE_OPP_VMIN_GPU,
 	.gpu.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
 	.gpu.addr		= TPS659038_REG_ADDR_SMPS45,
 	.gpu.pmic		= &tps659038,
 	.gpu.abb_tx_done_mask	= OMAP_ABB_GPU_TXDONE_MASK,
 
-	.core.value		= VDD_CORE_DRA752,
-	.core.efuse.reg		= STD_FUSE_OPP_VMIN_CORE_NOM,
+	.core.value		= VDD_CORE_DRA7,
+	.core.efuse.reg		= STD_FUSE_OPP_VMIN_CORE,
 	.core.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
 	.core.addr		= TPS659038_REG_ADDR_SMPS6,
 	.core.pmic		= &tps659038,
 
-	.iva.value		= VDD_IVA_DRA752,
-	.iva.efuse.reg		= STD_FUSE_OPP_VMIN_IVA_NOM,
+	.iva.value		= VDD_IVA_DRA7,
+	.iva.efuse.reg		= STD_FUSE_OPP_VMIN_IVA,
 	.iva.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
 	.iva.addr		= TPS659038_REG_ADDR_SMPS45,
 	.iva.pmic		= &tps659038,
 	.iva.abb_tx_done_mask	= OMAP_ABB_IVA_TXDONE_MASK,
 };
 
+struct vcores_data am572x_idk_volts = {
+	.mpu.value		= VDD_MPU_DRA7,
+	.mpu.efuse.reg		= STD_FUSE_OPP_VMIN_MPU,
+	.mpu.efuse.reg_bits     = DRA752_EFUSE_REGBITS,
+	.mpu.addr		= TPS659038_REG_ADDR_SMPS12,
+	.mpu.pmic		= &tps659038,
+	.mpu.abb_tx_done_mask	= OMAP_ABB_MPU_TXDONE_MASK,
+
+	.eve.value		= VDD_EVE_DRA7,
+	.eve.efuse.reg		= STD_FUSE_OPP_VMIN_DSPEVE,
+	.eve.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
+	.eve.addr		= TPS659038_REG_ADDR_SMPS45,
+	.eve.pmic		= &tps659038,
+	.eve.abb_tx_done_mask	= OMAP_ABB_EVE_TXDONE_MASK,
+
+	.gpu.value		= VDD_GPU_DRA7,
+	.gpu.efuse.reg		= STD_FUSE_OPP_VMIN_GPU,
+	.gpu.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
+	.gpu.addr		= TPS659038_REG_ADDR_SMPS6,
+	.gpu.pmic		= &tps659038,
+	.gpu.abb_tx_done_mask	= OMAP_ABB_GPU_TXDONE_MASK,
+
+	.core.value		= VDD_CORE_DRA7,
+	.core.efuse.reg		= STD_FUSE_OPP_VMIN_CORE,
+	.core.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
+	.core.addr		= TPS659038_REG_ADDR_SMPS7,
+	.core.pmic		= &tps659038,
+
+	.iva.value		= VDD_IVA_DRA7,
+	.iva.efuse.reg		= STD_FUSE_OPP_VMIN_IVA,
+	.iva.efuse.reg_bits	= DRA752_EFUSE_REGBITS,
+	.iva.addr		= TPS659038_REG_ADDR_SMPS8,
+	.iva.pmic		= &tps659038,
+	.iva.abb_tx_done_mask	= OMAP_ABB_IVA_TXDONE_MASK,
+};
+
 #ifdef CONFIG_SPL_BUILD
 /* No env to setup for SPL */
 static inline void setup_board_eeprom_env(void) { }
@@ -315,11 +351,18 @@
 
 #endif	/* CONFIG_SPL_BUILD */
 
+void vcores_init(void)
+{
+	if (board_is_am572x_idk())
+		*omap_vcores = &am572x_idk_volts;
+	else
+		*omap_vcores = &beagle_x15_volts;
+}
+
 void hw_data_init(void)
 {
 	*prcm = &dra7xx_prcm;
 	*dplls_data = &dra7xx_dplls;
-	*omap_vcores = &beagle_x15_volts;
 	*ctrl = &dra7xx_ctrl;
 }
 
@@ -439,6 +482,19 @@
 	.index = 1,
 };
 
+int usb_gadget_handle_interrupts(int index)
+{
+	u32 status;
+
+	status = dwc3_omap_uboot_interrupt_status(index);
+	if (status)
+		dwc3_uboot_handle_interrupt(index);
+
+	return 0;
+}
+#endif /* CONFIG_USB_DWC3 */
+
+#if defined(CONFIG_USB_DWC3) || defined(CONFIG_USB_XHCI_OMAP)
 int board_usb_init(int index, enum usb_init_type init)
 {
 	enable_usb_clocks(index);
@@ -448,31 +504,23 @@
 			printf("port %d can't be used as device\n", index);
 			disable_usb_clocks(index);
 			return -EINVAL;
-		} else {
-			usb_otg_ss1.dr_mode = USB_DR_MODE_HOST;
-			usb_otg_ss1_glue.vbus_id_status = OMAP_DWC3_ID_GROUND;
-			setbits_le32((*prcm)->cm_l3init_usb_otg_ss1_clkctrl,
-				     OTG_SS_CLKCTRL_MODULEMODE_HW |
-				     OPTFCLKEN_REFCLK960M);
 		}
-
-		ti_usb_phy_uboot_init(&usb_phy1_device);
-		dwc3_omap_uboot_init(&usb_otg_ss1_glue);
-		dwc3_uboot_init(&usb_otg_ss1);
 		break;
 	case 1:
 		if (init == USB_INIT_DEVICE) {
+#ifdef CONFIG_USB_DWC3
 			usb_otg_ss2.dr_mode = USB_DR_MODE_PERIPHERAL;
 			usb_otg_ss2_glue.vbus_id_status = OMAP_DWC3_VBUS_VALID;
+			ti_usb_phy_uboot_init(&usb_phy2_device);
+			dwc3_omap_uboot_init(&usb_otg_ss2_glue);
+			dwc3_uboot_init(&usb_otg_ss2);
+#endif
 		} else {
 			printf("port %d can't be used as host\n", index);
 			disable_usb_clocks(index);
 			return -EINVAL;
 		}
 
-		ti_usb_phy_uboot_init(&usb_phy2_device);
-		dwc3_omap_uboot_init(&usb_otg_ss2_glue);
-		dwc3_uboot_init(&usb_otg_ss2);
 		break;
 	default:
 		printf("Invalid Controller Index\n");
@@ -483,31 +531,24 @@
 
 int board_usb_cleanup(int index, enum usb_init_type init)
 {
+#ifdef CONFIG_USB_DWC3
 	switch (index) {
 	case 0:
 	case 1:
-		ti_usb_phy_uboot_exit(index);
-		dwc3_uboot_exit(index);
-		dwc3_omap_uboot_exit(index);
+		if (init == USB_INIT_DEVICE) {
+			ti_usb_phy_uboot_exit(index);
+			dwc3_uboot_exit(index);
+			dwc3_omap_uboot_exit(index);
+		}
 		break;
 	default:
 		printf("Invalid Controller Index\n");
 	}
+#endif
 	disable_usb_clocks(index);
 	return 0;
 }
-
-int usb_gadget_handle_interrupts(int index)
-{
-	u32 status;
-
-	status = dwc3_omap_uboot_interrupt_status(index);
-	if (status)
-		dwc3_uboot_handle_interrupt(index);
-
-	return 0;
-}
-#endif
+#endif /* defined(CONFIG_USB_DWC3) || defined(CONFIG_USB_XHCI_OMAP) */
 
 #ifdef CONFIG_DRIVER_TI_CPSW
 
diff --git a/board/ti/ks2_evm/board_k2g.c b/board/ti/ks2_evm/board_k2g.c
index b62c412..8f16845 100644
--- a/board/ti/ks2_evm/board_k2g.c
+++ b/board/ti/ks2_evm/board_k2g.c
@@ -117,12 +117,28 @@
 #endif
 
 #ifdef CONFIG_BOARD_EARLY_INIT_F
+
+static void k2g_reset_mux_config(void)
+{
+	/* Unlock the reset mux register */
+	clrbits_le32(KS2_RSTMUX8, RSTMUX_LOCK8_MASK);
+
+	/* Configure BOOTCFG_RSTMUX8 for WDT event to cause a device reset */
+	clrsetbits_le32(KS2_RSTMUX8, RSTMUX_OMODE8_MASK,
+			RSTMUX_OMODE8_DEV_RESET << RSTMUX_OMODE8_SHIFT);
+
+	/* lock the reset mux register to prevent any spurious writes. */
+	setbits_le32(KS2_RSTMUX8, RSTMUX_LOCK8_MASK);
+}
+
 int board_early_init_f(void)
 {
 	init_plls();
 
 	k2g_mux_config();
 
+	k2g_reset_mux_config();
+
 	/* deassert FLASH_HOLD */
 	clrbits_le32(K2G_GPIO1_BANK2_BASE + K2G_GPIO_DIR_OFFSET,
 		     BIT(9));
diff --git a/common/spl/spl.c b/common/spl/spl.c
index 8b10bdf..c8dfc14 100644
--- a/common/spl/spl.c
+++ b/common/spl/spl.c
@@ -192,6 +192,9 @@
 
 	debug("spl_init()\n");
 #if defined(CONFIG_SYS_MALLOC_F_LEN)
+#ifdef CONFIG_MALLOC_F_ADDR
+	gd->malloc_base = CONFIG_MALLOC_F_ADDR;
+#endif
 	gd->malloc_limit = CONFIG_SYS_MALLOC_F_LEN;
 	gd->malloc_ptr = 0;
 #endif
diff --git a/configs/omap3_logic_defconfig b/configs/omap3_logic_defconfig
index 3226247..e7bf385 100644
--- a/configs/omap3_logic_defconfig
+++ b/configs/omap3_logic_defconfig
@@ -1,6 +1,7 @@
 CONFIG_ARM=y
 CONFIG_OMAP34XX=y
 CONFIG_TARGET_OMAP3_LOGIC=y
+CONFIG_USE_TINY_PRINTF=y
 CONFIG_SPL=y
 CONFIG_FIT=y
 CONFIG_SYS_EXTRA_OPTIONS="NAND"
diff --git a/configs/tplink_wdr4300_defconfig b/configs/tplink_wdr4300_defconfig
index b1af2f6..3eec4c2 100644
--- a/configs/tplink_wdr4300_defconfig
+++ b/configs/tplink_wdr4300_defconfig
@@ -1,6 +1,7 @@
 CONFIG_MIPS=y
 CONFIG_ARCH_ATH79=y
 CONFIG_BOARD_TPLINK_WDR4300=y
+CONFIG_USE_PRIVATE_LIBGCC=y
 CONFIG_SYS_MALLOC_F_LEN=0x2000
 CONFIG_SYS_NS16550=y
 CONFIG_DM_SERIAL=y
diff --git a/drivers/gpio/mxs_gpio.c b/drivers/gpio/mxs_gpio.c
index b54a10b..c25b4c1 100644
--- a/drivers/gpio/mxs_gpio.c
+++ b/drivers/gpio/mxs_gpio.c
@@ -8,7 +8,6 @@
  */
 
 #include <common.h>
-#include <netdev.h>
 #include <asm/errno.h>
 #include <asm/io.h>
 #include <asm/arch/iomux.h>
diff --git a/drivers/serial/ns16550.c b/drivers/serial/ns16550.c
index d7a3cf6..c6cb3eb 100644
--- a/drivers/serial/ns16550.c
+++ b/drivers/serial/ns16550.c
@@ -54,12 +54,6 @@
 #define CONFIG_SYS_NS16550_IER  0x00
 #endif /* CONFIG_SYS_NS16550_IER */
 
-#ifdef CONFIG_DM_SERIAL
-
-#ifndef CONFIG_SYS_NS16550_CLK
-#define CONFIG_SYS_NS16550_CLK  0
-#endif
-
 static inline void serial_out_shift(void *addr, int shift, int value)
 {
 #ifdef CONFIG_SYS_NS16550_PORT_MAPPED
@@ -94,6 +88,12 @@
 #endif
 }
 
+#ifdef CONFIG_DM_SERIAL
+
+#ifndef CONFIG_SYS_NS16550_CLK
+#define CONFIG_SYS_NS16550_CLK  0
+#endif
+
 static void ns16550_writeb(NS16550_t port, int offset, int value)
 {
 	struct ns16550_platdata *plat = port->plat;
@@ -129,27 +129,13 @@
 		(unsigned char *)addr - (unsigned char *)com_port)
 #endif
 
-static inline int calc_divisor(NS16550_t port, int clock, int baudrate)
+int ns16550_calc_divisor(NS16550_t port, int clock, int baudrate)
 {
 	const unsigned int mode_x_div = 16;
 
 	return DIV_ROUND_CLOSEST(clock, mode_x_div * baudrate);
 }
 
-int ns16550_calc_divisor(NS16550_t port, int clock, int baudrate)
-{
-#ifdef CONFIG_OMAP1510
-	/* If can't cleanly clock 115200 set div to 1 */
-	if ((clock == 12000000) && (baudrate == 115200)) {
-		port->osc_12m_sel = OSC_12M_SEL;  /* enable 6.5 * divisor */
-		return 1;			/* return 1 for base divisor */
-	}
-	port->osc_12m_sel = 0;			/* clear if previsouly set */
-#endif
-
-	return calc_divisor(port, clock, baudrate);
-}
-
 static void NS16550_setbrg(NS16550_t com_port, int baud_divisor)
 {
 	serial_out(UART_LCR_BKSE | UART_LCRVAL, &com_port->lcr);
@@ -272,8 +258,8 @@
 	 * feasible. The better fix is to move all users of this driver to
 	 * driver model.
 	 */
-	baud_divisor = calc_divisor(com_port, CONFIG_DEBUG_UART_CLOCK,
-				    CONFIG_BAUDRATE);
+	baud_divisor = ns16550_calc_divisor(com_port, CONFIG_DEBUG_UART_CLOCK,
+					    CONFIG_BAUDRATE);
 	serial_dout(&com_port->ier, CONFIG_SYS_NS16550_IER);
 	serial_dout(&com_port->mcr, UART_MCRVAL);
 	serial_dout(&com_port->fcr, UART_FCRVAL);
diff --git a/drivers/usb/phy/omap_usb_phy.c b/drivers/usb/phy/omap_usb_phy.c
index f9069c7..1993da1 100644
--- a/drivers/usb/phy/omap_usb_phy.c
+++ b/drivers/usb/phy/omap_usb_phy.c
@@ -23,7 +23,7 @@
 #include "../host/xhci.h"
 
 #ifdef CONFIG_OMAP_USB3PHY1_HOST
-struct usb_dpll_params {
+struct usb3_dpll_params {
 	u16	m;
 	u8	n;
 	u8	freq:3;
@@ -31,17 +31,39 @@
 	u32	mf;
 };
 
-#define	NUM_USB_CLKS		6
+struct usb3_dpll_map {
+	unsigned long rate;
+	struct usb3_dpll_params params;
+	struct usb3_dpll_map *dpll_map;
+};
 
-static struct usb_dpll_params omap_usb3_dpll_params[NUM_USB_CLKS] = {
-	{1250, 5, 4, 20, 0},		/* 12 MHz */
-	{3125, 20, 4, 20, 0},		/* 16.8 MHz */
-	{1172, 8, 4, 20, 65537},	/* 19.2 MHz */
-	{1250, 12, 4, 20, 0},		/* 26 MHz */
-	{3125, 47, 4, 20, 92843},	/* 38.4 MHz */
-	{1000, 7, 4, 10, 0},        /* 20 MHz */
+static struct usb3_dpll_map dpll_map_usb[] = {
+	{12000000, {1250, 5, 4, 20, 0} },	/* 12 MHz */
+	{16800000, {3125, 20, 4, 20, 0} },	/* 16.8 MHz */
+	{19200000, {1172, 8, 4, 20, 65537} },	/* 19.2 MHz */
+	{20000000, {1000, 7, 4, 10, 0} },	/* 20 MHz */
+	{26000000, {1250, 12, 4, 20, 0} },	/* 26 MHz */
+	{38400000, {3125, 47, 4, 20, 92843} },	/* 38.4 MHz */
+	{ },					/* Terminator */
 };
 
+static struct usb3_dpll_params *omap_usb3_get_dpll_params(void)
+{
+	unsigned long rate;
+	struct usb3_dpll_map *dpll_map = dpll_map_usb;
+
+	rate = get_sys_clk_freq();
+
+	for (; dpll_map->rate; dpll_map++) {
+		if (rate == dpll_map->rate)
+			return &dpll_map->params;
+	}
+
+	dev_err(phy->dev, "No DPLL configuration for %lu Hz SYS CLK\n", rate);
+
+	return NULL;
+}
+
 static void omap_usb_dpll_relock(struct omap_usb3_phy *phy_regs)
 {
 	u32 val;
@@ -56,32 +78,36 @@
 
 static void omap_usb_dpll_lock(struct omap_usb3_phy *phy_regs)
 {
-	u32 clk_index = get_sys_clk_index();
+	struct usb3_dpll_params	*dpll_params;
 	u32 val;
 
+	dpll_params = omap_usb3_get_dpll_params();
+	if (!dpll_params)
+		return;
+
 	val = readl(&phy_regs->pll_config_1);
 	val &= ~PLL_REGN_MASK;
-	val |= omap_usb3_dpll_params[clk_index].n << PLL_REGN_SHIFT;
+	val |= dpll_params->n << PLL_REGN_SHIFT;
 	writel(val, &phy_regs->pll_config_1);
 
 	val = readl(&phy_regs->pll_config_2);
 	val &= ~PLL_SELFREQDCO_MASK;
-	val |= omap_usb3_dpll_params[clk_index].freq << PLL_SELFREQDCO_SHIFT;
+	val |= dpll_params->freq << PLL_SELFREQDCO_SHIFT;
 	writel(val, &phy_regs->pll_config_2);
 
 	val = readl(&phy_regs->pll_config_1);
 	val &= ~PLL_REGM_MASK;
-	val |= omap_usb3_dpll_params[clk_index].m << PLL_REGM_SHIFT;
+	val |= dpll_params->m << PLL_REGM_SHIFT;
 	writel(val, &phy_regs->pll_config_1);
 
 	val = readl(&phy_regs->pll_config_4);
 	val &= ~PLL_REGM_F_MASK;
-	val |= omap_usb3_dpll_params[clk_index].mf << PLL_REGM_F_SHIFT;
+	val |= dpll_params->mf << PLL_REGM_F_SHIFT;
 	writel(val, &phy_regs->pll_config_4);
 
 	val = readl(&phy_regs->pll_config_3);
 	val &= ~PLL_SD_MASK;
-	val |= omap_usb3_dpll_params[clk_index].sd << PLL_SD_SHIFT;
+	val |= dpll_params->sd << PLL_SD_SHIFT;
 	writel(val, &phy_regs->pll_config_3);
 
 	omap_usb_dpll_relock(phy_regs);
diff --git a/include/configs/tplink_wdr4300.h b/include/configs/tplink_wdr4300.h
index 6273711..abe1da2 100644
--- a/include/configs/tplink_wdr4300.h
+++ b/include/configs/tplink_wdr4300.h
@@ -78,8 +78,6 @@
 #define CONFIG_SYS_MEMTEST_END		0x83f00000
 #define CONFIG_CMD_MEMTEST
 
-#define CONFIG_USE_PRIVATE_LIBGCC
-
 #define CONFIG_CMD_MII
 #define CONFIG_PHY_GIGE
 
diff --git a/lib/Kconfig b/lib/Kconfig
index 2b97c2b..02ca405 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -14,6 +14,7 @@
 config USE_PRIVATE_LIBGCC
 	bool "Use private libgcc"
 	depends on HAVE_PRIVATE_LIBGCC
+	default y if HAVE_PRIVATE_LIBGCC && ((ARM && !ARM64) || MIPS)
 	help
 	  This option allows you to use the built-in libgcc implementation
 	  of U-Boot instead of the one provided by the compiler.
diff --git a/lib/tiny-printf.c b/lib/tiny-printf.c
index 4b70263..5ea2555 100644
--- a/lib/tiny-printf.c
+++ b/lib/tiny-printf.c
@@ -147,8 +147,7 @@
 	*outstr++ = ch;
 }
 
-/* Note that size is ignored */
-int snprintf(char *buf, size_t size, const char *fmt, ...)
+int sprintf(char *buf, const char *fmt, ...)
 {
 	va_list va;
 	int ret;
@@ -161,3 +160,16 @@
 
 	return ret;
 }
+
+/* Note that size is ignored */
+int snprintf(char *buf, size_t size, const char *fmt, ...)
+{
+	va_list va;
+	int ret;
+
+	va_start(va, fmt);
+	ret = sprintf(buf, fmt, va);
+	va_end(va);
+
+	return ret;
+}