arm64: issue ISB after updating system registers

ARM Architecture reference manual clearly states that PE pipeline
should be flushed after changes to some system registers. Refer to
paragraph "B2.3.5 Memory Barriers" at page B2-92 of "Arm Architecture
Reference Manual ARMv8 for ARMv8-A Architecture Profile" (ARM DDI
0487B.a).

Failing to issue instruction synchronization barrier can lead to
spurious errors, like synchronous exception when accessing FPU
registers. This is very prominent on CPUs with long instruction
pipeline, like ARM Cortex A72.

This change fixes the following U-Boot panic:

 "Synchronous Abort" handler, esr 0x1fe00000
 elr: 00000000800948cc lr : 0000000080091e04
 x0 : 00000000801ffdc8 x1 : 00000000000000c8
 x2 : 00000000800979d4 x3 : 00000000801ffc60
 x4 : 00000000801ffd40 x5 : ffffff80ffffffd8
 x6 : 00000000801ffd70 x7 : 00000000801ffd70
 x8 : 000000000000000a x9 : 0000000000000000
 x10: 0000000000000044 x11: 0000000000000000
 x12: 0000000000000000 x13: 0000000000000000
 x14: 0000000000000000 x15: 0000000000000000
 x16: 000000008008b2e0 x17: 0000000000000000
 x18: 00000000801ffec0 x19: 00000000800957b0
 x20: 00000000000000c8 x21: 00000000801ffdc8
 x22: 000000008009909e x23: 0000000000000000
 x24: 0000000000000000 x25: 0000000000000000
 x26: 0000000000000000 x27: 0000000000000000
 x28: 0000000000000000 x29: 00000000801ffc50

 Code: a94417e4 a90217e4 a9051fe6 a90617e4 (3d801fe0)

While executing instruction

 str     q0, [sp, #112]

in vsnprintf() prologue. This panic was observed only on Cortex A72 so
far.

This patch places ISBs on other strategic places as well.

Also, this probably is the right fix for the issue workarounded in the
commit 45f41c134baf ("ARM: uniphier: add weird workaround code for LD20")

Reported-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Suggested-by: Julien Grall <julien.grall.oss@gmail.com>
Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
CC: Tom Rini <trini@konsulko.com>
CC: Masahiro Yamada <yamada.masahiro@socionext.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Julien Grall <julien@xen.org>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Tested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
index 99d1266..002698b 100644
--- a/arch/arm/cpu/armv8/start.S
+++ b/arch/arm/cpu/armv8/start.S
@@ -120,6 +120,7 @@
 	mov	x0, #3 << 20
 	msr	cpacr_el1, x0			/* Enable FP/SIMD */
 0:
+	isb
 
 	/*
 	 * Enable SMPEN bit for coherency.
@@ -132,6 +133,7 @@
 	mrs     x0, S3_1_c15_c2_1               /* cpuectlr_el1 */
 	orr     x0, x0, #0x40
 	msr     S3_1_c15_c2_1, x0
+	isb
 1:
 #endif
 
@@ -233,6 +235,7 @@
 	/* Enable data cache clean as data cache clean/invalidate */
 	orr	x0, x0, #1 << 44
 	msr	S3_1_c15_c2_0, x0	/* cpuactlr_el1 */
+	isb
 #endif
 	b 0b
 
@@ -247,6 +250,7 @@
 	/* Disable write streaming no-allocate threshold */
 	orr	x0, x0, #3 << 27
 	msr	S3_1_c15_c2_0, x0	/* cpuactlr_el1 */
+	isb
 #endif
 
 #ifdef CONFIG_ARM_ERRATA_826974
@@ -254,6 +258,7 @@
 	/* Disable speculative load execution ahead of a DMB */
 	orr	x0, x0, #1 << 59
 	msr	S3_1_c15_c2_0, x0	/* cpuactlr_el1 */
+	isb
 #endif
 
 #ifdef CONFIG_ARM_ERRATA_833471
@@ -263,6 +268,7 @@
 	    could impact performance. */
 	orr	x0, x0, #1 << 38
 	msr	S3_1_c15_c2_0, x0	/* cpuactlr_el1 */
+	isb
 #endif
 
 #ifdef CONFIG_ARM_ERRATA_829520
@@ -273,6 +279,7 @@
 	    could impact performance. */
 	orr	x0, x0, #1 << 4
 	msr	S3_1_c15_c2_0, x0	/* cpuactlr_el1 */
+	isb
 #endif
 
 #ifdef CONFIG_ARM_ERRATA_833069
@@ -280,6 +287,7 @@
 	/* Disable Enable Invalidates of BTB bit */
 	and	x0, x0, #0xE
 	msr	S3_1_c15_c2_0, x0	/* cpuactlr_el1 */
+	isb
 #endif
 	b 0b
 ENDPROC(apply_core_errata)