arm: ARMv4 assembly compatibility

There is currently a problem that U-Boot can not work on ARMv4
because assembly imlementations of memcpy() and some other functions
use "bx lr" instruction that is not available on ARMv4 ("mov pc, lr"
should be used instead).

A working preprocessor-based solution to this problem is found in
arch/arm/lib/relocate.S. Move it to the "ret" macro in
arch/arm/include/asm/assembler.h and change all "bx lr" code
to "ret lr" in functions that may run on ARMv4. Linux source code
deals with this problem in the same manner.

v1 -> v2:
Comment update. Pointed out by Andre Przywara.

Signed-off-by: Sergei Antonov <saproj@gmail.com>
CC: Samuel Holland <samuel@sholland.org>
CC: Ye Li <ye.li@nxp.com>
CC: Simon Glass <sjg@chromium.org>
CC: Andre Przywara <andre.przywara@arm.com>
CC: Marek Vasut <marex@denx.de>
CC: Sean Anderson <sean.anderson@seco.com>
CC: Tom Rini <trini@konsulko.com>
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index b146918..8d42ef4 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -58,16 +58,22 @@
 #endif
 
 /*
- * We only support cores that support at least Thumb-1 and thus we use
- * 'bx lr'
+ * Use 'bx lr' everywhere except ARMv4 (without 'T') where only 'mov pc, lr'
+ * works
  */
 	.irp	c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
 	.macro	ret\c, reg
+
+	/* ARMv4- don't know bx lr but the assembler fails to see that */
+#ifdef __ARM_ARCH_4__
+	mov\c	pc, \reg
+#else
 	.ifeqs	"\reg", "lr"
 	bx\c	\reg
 	.else
 	mov\c	pc, \reg
 	.endif
+#endif
 	.endm
 	.endr