arm: support Thumb-1 with CONFIG_SYS_THUMB_BUILD

When building a Thumb-1-only target with CONFIG_SYS_THUMB_BUILD,
some files fail to build, most of the time because they include
mcr instructions, which only exist for Thumb-2.

This patch introduces a Kconfig option CONFIG_THUMB2 and uses
it to select between Thumb-2 and ARM mode for the aforementioned
files.

Signed-off-by: Albert ARIBAUD <albert.u.boot@aribaud.net>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0d756cb..5ab0254 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -10,6 +10,9 @@
 config HAS_VBAR
         bool
 
+config HAS_THUMB2
+        bool
+
 config CPU_ARM720T
         bool
 
@@ -32,9 +35,11 @@
 config CPU_V7
         bool
         select HAS_VBAR
+        select HAS_THUMB2
 
 config CPU_V7M
 	bool
+        select HAS_THUMB2
 
 config CPU_PXA
         bool
diff --git a/arch/arm/cpu/arm926ejs/Makefile b/arch/arm/cpu/arm926ejs/Makefile
index 63fa159..fe78922 100644
--- a/arch/arm/cpu/arm926ejs/Makefile
+++ b/arch/arm/cpu/arm926ejs/Makefile
@@ -20,3 +20,14 @@
 obj-$(CONFIG_MX27) += mx27/
 obj-$(if $(filter mxs,$(SOC)),y) += mxs/
 obj-$(if $(filter spear,$(SOC)),y) += spear/
+
+# some files can only build in ARM or THUMB2, not THUMB1
+
+ifdef CONFIG_SYS_THUMB_BUILD
+ifndef CONFIG_HAS_THUMB2
+
+CFLAGS_cpu.o := -marm
+CFLAGS_cache.o := -marm
+
+endif
+endif
diff --git a/arch/arm/cpu/arm926ejs/cache.c b/arch/arm/cpu/arm926ejs/cache.c
index e5c1a6a..2839c86 100644
--- a/arch/arm/cpu/arm926ejs/cache.c
+++ b/arch/arm/cpu/arm926ejs/cache.c
@@ -82,4 +82,9 @@
 /*
  * Stub implementations for l2 cache operations
  */
+
 __weak void l2_cache_disable(void) {}
+
+#if defined CONFIG_SYS_THUMB_BUILD
+__weak void invalidate_l2_cache(void) {}
+#endif
diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
index a836e9f..1f63127 100644
--- a/arch/arm/include/asm/cache.h
+++ b/arch/arm/include/asm/cache.h
@@ -16,6 +16,9 @@
 /*
  * Invalidate L2 Cache using co-proc instruction
  */
+#ifdef CONFIG_SYS_THUMB_BUILD
+void invalidate_l2_cache(void);
+#else
 static inline void invalidate_l2_cache(void)
 {
 	unsigned int val=0;
@@ -24,6 +27,7 @@
 		: : "r" (val) : "cc");
 	isb();
 }
+#endif
 
 void l2_cache_enable(void);
 void l2_cache_disable(void);
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 2bdfaba..f3db7b5 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -60,3 +60,27 @@
 ifneq (,$(findstring -mabi=aapcs-linux,$(PLATFORM_CPPFLAGS)))
 extra-y	+= eabi_compat.o
 endif
+
+# some files can only build in ARM or THUMB2, not THUMB1
+
+ifdef CONFIG_SYS_THUMB_BUILD
+ifndef CONFIG_HAS_THUMB2
+
+# for C files, just apend -marm, which will override previous -mthumb*
+
+CFLAGS_cache.o := -marm
+CFLAGS_cache-cp15.o := -marm
+
+# For .S, drop -mthumb* and other thumb-related options.
+# CFLAGS_REMOVE_* would not have an effet, so AFLAGS_REMOVE_*
+# was implemented and is used here.
+# Also, define ${target}_NO_THUMB_BUILD for these two targets
+# so that the code knows it should not use Thumb.
+
+AFLAGS_REMOVE_memset.o := -mthumb -mthumb-interwork
+AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork
+AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD
+AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD
+
+endif
+endif
diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c
index cd13db3..3bd8710 100644
--- a/arch/arm/lib/cache.c
+++ b/arch/arm/lib/cache.c
@@ -88,3 +88,14 @@
 	return next;
 }
 #endif /* CONFIG_SYS_NONCACHED_MEMORY */
+
+#if defined(CONFIG_SYS_THUMB_BUILD)
+void invalidate_l2_cache(void)
+{
+	unsigned int val = 0;
+
+	asm volatile("mcr p15, 1, %0, c15, c11, 0 @ invl l2 cache"
+		: : "r" (val) : "cc");
+	isb();
+}
+#endif
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index eeaf003..7d9fc0f 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -13,7 +13,7 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-#ifdef CONFIG_SYS_THUMB_BUILD
+#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(MEMCPY_NO_THUMB_BUILD)
 #define W(instr)	instr.w
 #else
 #define W(instr)	instr
@@ -62,7 +62,7 @@
 
 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 	.syntax unified
-#ifdef CONFIG_SYS_THUMB_BUILD
+#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(MEMCPY_NO_THUMB_BUILD)
 	.thumb
 	.thumb_func
 #endif
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 7208f20..df053a3 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -16,7 +16,7 @@
 	.align	5
 
 	.syntax unified
-#ifdef CONFIG_SYS_THUMB_BUILD
+#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(MEMSET_NO_THUMB_BUILD)
 	.thumb
 	.thumb_func
 #endif
diff --git a/arch/arm/mach-orion5x/Makefile b/arch/arm/mach-orion5x/Makefile
index 546ebcb..33dcad4 100644
--- a/arch/arm/mach-orion5x/Makefile
+++ b/arch/arm/mach-orion5x/Makefile
@@ -16,3 +16,13 @@
 ifndef CONFIG_SKIP_LOWLEVEL_INIT
 obj-y	+= lowlevel_init.o
 endif
+
+# some files can only build in ARM or THUMB2, not THUMB1
+
+ifdef CONFIG_SYS_THUMB_BUILD
+ifndef CONFIG_HAS_THUMB2
+
+CFLAGS_cpu.o := -marm
+
+endif
+endif
diff --git a/arch/arm/thumb1/include/asm/proc-armv/system.h b/arch/arm/thumb1/include/asm/proc-armv/system.h
new file mode 100644
index 0000000..7dfbf3d
--- /dev/null
+++ b/arch/arm/thumb1/include/asm/proc-armv/system.h
@@ -0,0 +1,69 @@
+/*
+ *  Thumb-1 drop-in for the linux/include/asm-arm/proc-armv/system.h
+ *
+ *  (C) Copyright 2015
+ *  Albert ARIBAUD <albert.u.boot@aribaud.net>
+ *
+ * The original file does not build in Thumb mode. However, in U-Boot
+ * we don't use interrupt context, so we can redefine these as empty
+ * memory barriers, which makes Thumb-1 compiler happy.
+ *
+ *  SPDX-License-Identifier:	GPL-2.0+
+ */
+
+/*
+ * Use the same macro name as linux/include/asm-arm/proc-armv/system.h
+ * here, so that if the original ever gets included after us, it won't
+ * try to re-redefine anything.
+ */
+
+#ifndef __ASM_PROC_SYSTEM_H
+#define __ASM_PROC_SYSTEM_H
+
+/*
+ * Redefine all original macros with static inline functions containing
+ * a simple memory barrier, so that they produce the same instruction
+ * ordering constraints as their original counterparts.
+ * We use static inline functions rather than macros so that we can tell
+ * the compiler to not complain about unused arguments.
+ */
+
+static inline void local_irq_save(
+	unsigned long flags __attribute__((unused)))
+{
+	__asm__ __volatile__ ("" : : : "memory");
+}
+
+static inline void local_irq_enable(void)
+{
+	__asm__ __volatile__ ("" : : : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+	__asm__ __volatile__ ("" : : : "memory");
+}
+
+static inline void __stf(void)
+{
+	__asm__ __volatile__ ("" : : : "memory");
+}
+
+static inline void __clf(void)
+{
+	__asm__ __volatile__ ("" : : : "memory");
+}
+
+static inline void local_save_flags(
+	unsigned long flags __attribute__((unused)))
+{
+	__asm__ __volatile__ ("" : : : "memory");
+}
+
+static inline void local_irq_restore(
+	unsigned long flags __attribute__((unused)))
+{
+	__asm__ __volatile__ ("" : : : "memory");
+}
+
+#endif /*  __ASM_PROC_SYSTEM_H */