Correct usage of data and instruction barriers

The current code does not always use data and instruction
barriers as required by the architecture and frequently uses
barriers excessively due to their inclusion in all of the
write_*() helper functions.

Barriers should be used explicitly in assembler or C code
when modifying processor state that requires the barriers in
order to enable review of correctness of the code.

This patch removes the barriers from the helper functions and
introduces them as necessary elsewhere in the code.

PORTING NOTE: check any port of Trusted Firmware for use of
system register helper functions for reliance on the previous
barrier behaviour and add explicit barriers as necessary.

Fixes ARM-software/tf-issues#92

Change-Id: Ie63e187404ff10e0bdcb39292dd9066cb84c53bf
diff --git a/bl1/aarch64/bl1_arch_setup.c b/bl1/aarch64/bl1_arch_setup.c
index 758b8e8..a1ebbdb 100644
--- a/bl1/aarch64/bl1_arch_setup.c
+++ b/bl1/aarch64/bl1_arch_setup.c
@@ -44,6 +44,7 @@
 	tmp_reg |= (SCTLR_A_BIT | SCTLR_SA_BIT);
 	tmp_reg &= ~SCTLR_EE_BIT;
 	write_sctlr_el3(tmp_reg);
+	isb();
 
 	/*
 	 * Enable HVCs, route FIQs to EL3, set the next EL to be AArch64, route
diff --git a/bl1/aarch64/bl1_entrypoint.S b/bl1/aarch64/bl1_entrypoint.S
index 012b779..e25386f 100644
--- a/bl1/aarch64/bl1_entrypoint.S
+++ b/bl1/aarch64/bl1_entrypoint.S
@@ -86,7 +86,6 @@
 	mrs	x0, sctlr_el3
 	orr	x0, x0, #SCTLR_I_BIT
 	msr	sctlr_el3, x0
-
 	isb
 
 _wait_for_entrypoint:
diff --git a/bl1/aarch64/bl1_exceptions.S b/bl1/aarch64/bl1_exceptions.S
index 68d088b..7f930d8 100644
--- a/bl1/aarch64/bl1_exceptions.S
+++ b/bl1/aarch64/bl1_exceptions.S
@@ -221,6 +221,7 @@
 	bl	read_sctlr_el3
 	bic	x0, x0, x1
 	bl	write_sctlr_el3
+	isb
 	mov	x0, #DCCISW
 	bl	dcsw_op_all
 	bl	tlbialle3
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index b8af9a5..cd07aa9 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -73,7 +73,6 @@
 	mrs	x0, sctlr_el1
 	orr	x0, x0, #SCTLR_I_BIT
 	msr	sctlr_el1, x0
-
 	isb
 
 	/* ---------------------------------------------
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 39fa605..1b79421 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -89,7 +89,6 @@
 	mrs	x1, sctlr_el3
 	orr	x1, x1, #SCTLR_I_BIT
 	msr	sctlr_el3, x1
-
 	isb
 
 	/* ---------------------------------------------
diff --git a/bl31/bl31_main.c b/bl31/bl31_main.c
index 01f00f2..755320d 100644
--- a/bl31/bl31_main.c
+++ b/bl31/bl31_main.c
@@ -100,6 +100,7 @@
 	assert(cm_get_context(mpidr, NON_SECURE));
 	cm_set_next_eret_context(NON_SECURE);
 	write_vbar_el3((uint64_t) runtime_exceptions);
+	isb();
 	next_image_type = NON_SECURE;
 
 	/*
diff --git a/drivers/arm/gic/aarch64/gic_v3_sysregs.S b/drivers/arm/gic/aarch64/gic_v3_sysregs.S
index 2a96da7..ddf85a8 100644
--- a/drivers/arm/gic/aarch64/gic_v3_sysregs.S
+++ b/drivers/arm/gic/aarch64/gic_v3_sysregs.S
@@ -67,23 +67,19 @@
 
 func write_icc_sre_el1
 	msr	ICC_SRE_EL1, x0
-	isb
 	ret
 
 
 func write_icc_sre_el2
 	msr	ICC_SRE_EL2, x0
-	isb
 	ret
 
 
 func write_icc_sre_el3
 	msr	ICC_SRE_EL3, x0
-	isb
 	ret
 
 
 func write_icc_pmr_el1
 	msr	ICC_PMR_EL1, x0
-	isb
 	ret
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index 2649ad0..dc91975 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -46,57 +46,41 @@
 
 func dcisw
 	dc	isw, x0
-	dsb	sy
-	isb
 	ret
 
 
 func dccisw
 	dc	cisw, x0
-	dsb	sy
-	isb
 	ret
 
 
 func dccsw
 	dc	csw, x0
-	dsb	sy
-	isb
 	ret
 
 
 func dccvac
 	dc	cvac, x0
-	dsb	sy
-	isb
 	ret
 
 
 func dcivac
 	dc	ivac, x0
-	dsb	sy
-	isb
 	ret
 
 
 func dccivac
 	dc	civac, x0
-	dsb	sy
-	isb
 	ret
 
 
 func dccvau
 	dc	cvau, x0
-	dsb	sy
-	isb
 	ret
 
 
 func dczva
 	dc	zva, x0
-	dsb	sy
-	isb
 	ret
 
 
diff --git a/lib/aarch64/cpu_helpers.S b/lib/aarch64/cpu_helpers.S
index 573d0b8..4e5eb5b 100644
--- a/lib/aarch64/cpu_helpers.S
+++ b/lib/aarch64/cpu_helpers.S
@@ -52,5 +52,6 @@
 	bl	read_cpuectlr
 	orr	x0, x0, #CPUECTLR_SMP_BIT
 	bl	write_cpuectlr
+	isb
 smp_setup_end:
 	ret	x19
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index e7b2331..c33ade2 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -187,19 +187,16 @@
 
 func write_spsr_el1
 	msr	spsr_el1, x0
-	isb
 	ret
 
 
 func write_spsr_el2
 	msr	spsr_el2, x0
-	isb
 	ret
 
 
 func write_spsr_el3
 	msr	spsr_el3, x0
-	isb
 	ret
 
 
@@ -240,19 +237,16 @@
 
 func write_elr_el1
 	msr	elr_el1, x0
-	isb
 	ret
 
 
 func write_elr_el2
 	msr	elr_el2, x0
-	isb
 	ret
 
 
 func write_elr_el3
 	msr	elr_el3, x0
-	isb
 	ret
 
 
diff --git a/lib/aarch64/sysreg_helpers.S b/lib/aarch64/sysreg_helpers.S
index 61468f9..1d73ba9 100644
--- a/lib/aarch64/sysreg_helpers.S
+++ b/lib/aarch64/sysreg_helpers.S
@@ -201,19 +201,16 @@
 
 func write_vbar_el1
 	msr	vbar_el1, x0
-	isb
 	ret
 
 
 func write_vbar_el2
 	msr	vbar_el2, x0
-	isb
 	ret
 
 
 func write_vbar_el3
 	msr	vbar_el3, x0
-	isb
 	ret
 
 
@@ -238,19 +235,16 @@
 
 func write_afsr0_el1
 	msr	afsr0_el1, x0
-	isb
 	ret
 
 
 func write_afsr0_el2
 	msr	afsr0_el2, x0
-	isb
 	ret
 
 
 func write_afsr0_el3
 	msr	afsr0_el3, x0
-	isb
 	ret
 
 
@@ -275,19 +269,16 @@
 
 func write_far_el1
 	msr	far_el1, x0
-	isb
 	ret
 
 
 func write_far_el2
 	msr	far_el2, x0
-	isb
 	ret
 
 
 func write_far_el3
 	msr	far_el3, x0
-	isb
 	ret
 
 
@@ -312,19 +303,16 @@
 
 func write_mair_el1
 	msr	mair_el1, x0
-	isb
 	ret
 
 
 func write_mair_el2
 	msr	mair_el2, x0
-	isb
 	ret
 
 
 func write_mair_el3
 	msr	mair_el3, x0
-	isb
 	ret
 
 
@@ -349,19 +337,16 @@
 
 func write_amair_el1
 	msr	amair_el1, x0
-	isb
 	ret
 
 
 func write_amair_el2
 	msr	amair_el2, x0
-	isb
 	ret
 
 
 func write_amair_el3
 	msr	amair_el3, x0
-	isb
 	ret
 
 
@@ -405,19 +390,16 @@
 
 func write_rmr_el1
 	msr	rmr_el1, x0
-	isb
 	ret
 
 
 func write_rmr_el2
 	msr	rmr_el2, x0
-	isb
 	ret
 
 
 func write_rmr_el3
 	msr	rmr_el3, x0
-	isb
 	ret
 
 
@@ -442,19 +424,16 @@
 
 func write_afsr1_el1
 	msr	afsr1_el1, x0
-	isb
 	ret
 
 
 func write_afsr1_el2
 	msr	afsr1_el2, x0
-	isb
 	ret
 
 
 func write_afsr1_el3
 	msr	afsr1_el3, x0
-	isb
 	ret
 
 
@@ -479,22 +458,16 @@
 
 func write_sctlr_el1
 	msr	sctlr_el1, x0
-	dsb	sy
-	isb
 	ret
 
 
 func write_sctlr_el2
 	msr	sctlr_el2, x0
-	dsb	sy
-	isb
 	ret
 
 
 func write_sctlr_el3
 	msr	sctlr_el3, x0
-	dsb	sy
-	isb
 	ret
 
 
@@ -519,22 +492,16 @@
 
 func write_actlr_el1
 	msr	actlr_el1, x0
-	dsb	sy
-	isb
 	ret
 
 
 func write_actlr_el2
 	msr	actlr_el2, x0
-	dsb	sy
-	isb
 	ret
 
 
 func write_actlr_el3
 	msr	actlr_el3, x0
-	dsb	sy
-	isb
 	ret
 
 
@@ -559,22 +526,16 @@
 
 func write_esr_el1
 	msr	esr_el1, x0
-	dsb	sy
-	isb
 	ret
 
 
 func write_esr_el2
 	msr	esr_el2, x0
-	dsb	sy
-	isb
 	ret
 
 
 func write_esr_el3
 	msr	esr_el3, x0
-	dsb	sy
-	isb
 	ret
 
 
@@ -599,22 +560,16 @@
 
 func write_tcr_el1
 	msr	tcr_el1, x0
-	dsb	sy
-	isb
 	ret
 
 
 func write_tcr_el2
 	msr	tcr_el2, x0
-	dsb	sy
-	isb
 	ret
 
 
 func write_tcr_el3
 	msr	tcr_el3, x0
-	dsb	sy
-	isb
 	ret
 
 
@@ -643,15 +598,11 @@
 
 func write_cptr_el2
 	msr	cptr_el2, x0
-	dsb	sy
-	isb
 	ret
 
 
 func write_cptr_el3
 	msr	cptr_el3, x0
-	dsb	sy
-	isb
 	ret
 
 
@@ -676,19 +627,16 @@
 
 func write_ttbr0_el1
 	msr	ttbr0_el1, x0
-	isb
 	ret
 
 
 func write_ttbr0_el2
 	msr	ttbr0_el2, x0
-	isb
 	ret
 
 
 func write_ttbr0_el3
 	msr	ttbr0_el3, x0
-	isb
 	ret
 
 
@@ -711,7 +659,6 @@
 
 func write_ttbr1_el1
 	msr	ttbr1_el1, x0
-	isb
 	ret
 
 
@@ -730,8 +677,6 @@
 
 func write_hcr
 	msr	hcr_el2, x0
-	dsb	sy
-	isb
 	ret
 
 
@@ -762,8 +707,6 @@
 
 func write_cpuectlr
 	msr	CPUECTLR_EL1, x0
-	dsb	sy
-	isb
 	ret
 
 
@@ -789,8 +732,6 @@
 
 func write_scr
 	msr	scr_el3, x0
-	dsb	sy
-	isb
 	ret
 
 
@@ -818,6 +759,7 @@
 	mov	x1, #AARCH64_CPTR_TFP
 	bic	x0, x0, x1
 	msr	cptr_el3, x0
+	isb
 	ret
 
 
diff --git a/lib/aarch64/tlb_helpers.S b/lib/aarch64/tlb_helpers.S
index ec1558b..8dfae12 100644
--- a/lib/aarch64/tlb_helpers.S
+++ b/lib/aarch64/tlb_helpers.S
@@ -41,47 +41,33 @@
 
 func tlbialle1
 	tlbi	alle1
-	dsb	sy
-	isb
 	ret
 
 
 func tlbialle1is
 	tlbi	alle1is
-	dsb	sy
-	isb
 	ret
 
 
 func tlbialle2
 	tlbi	alle2
-	dsb	sy
-	isb
 	ret
 
 
 func tlbialle2is
 	tlbi	alle2is
-	dsb	sy
-	isb
 	ret
 
 
 func tlbialle3
 	tlbi	alle3
-	dsb	sy
-	isb
 	ret
 
 
 func tlbialle3is
 	tlbi	alle3is
-	dsb	sy
-	isb
 	ret
 
 func tlbivmalle1
 	tlbi	vmalle1
-	dsb	sy
-	isb
 	ret
diff --git a/plat/fvp/aarch64/plat_common.c b/plat/fvp/aarch64/plat_common.c
index c8e529d..e2f2343 100644
--- a/plat/fvp/aarch64/plat_common.c
+++ b/plat/fvp/aarch64/plat_common.c
@@ -69,6 +69,8 @@
 	ttbr = (unsigned long) l1_xlation_table;
 
 	if (GET_EL(current_el) == MODE_EL3) {
+		assert((read_sctlr_el3() & SCTLR_M_BIT) == 0);
+
 		write_mair_el3(mair);
 		tcr |= TCR_EL3_RES1;
 		/* Invalidate EL3 TLBs */
@@ -77,11 +79,19 @@
 		write_tcr_el3(tcr);
 		write_ttbr0_el3(ttbr);
 
+		/* ensure all translation table writes have drained into memory,
+		 * the TLB invalidation is complete, and translation register
+		 * writes are committed before enabling the MMU
+		 */
+		dsb();
+		isb();
+
 		sctlr = read_sctlr_el3();
 		sctlr |= SCTLR_WXN_BIT | SCTLR_M_BIT | SCTLR_I_BIT;
 		sctlr |= SCTLR_A_BIT | SCTLR_C_BIT;
 		write_sctlr_el3(sctlr);
 	} else {
+		assert((read_sctlr_el1() & SCTLR_M_BIT) == 0);
 
 		write_mair_el1(mair);
 		/* Invalidate EL1 TLBs */
@@ -90,11 +100,20 @@
 		write_tcr_el1(tcr);
 		write_ttbr0_el1(ttbr);
 
+		/* ensure all translation table writes have drained into memory,
+		 * the TLB invalidation is complete, and translation register
+		 * writes are committed before enabling the MMU
+		 */
+		dsb();
+		isb();
+
 		sctlr = read_sctlr_el1();
 		sctlr |= SCTLR_WXN_BIT | SCTLR_M_BIT | SCTLR_I_BIT;
 		sctlr |= SCTLR_A_BIT | SCTLR_C_BIT;
 		write_sctlr_el1(sctlr);
 	}
+	/* ensure the MMU enable takes effect immediately */
+	isb();
 
 	return;
 }
@@ -113,6 +132,8 @@
 		sctlr = sctlr & ~(SCTLR_M_BIT | SCTLR_C_BIT);
 		write_sctlr_el1(sctlr);
 	}
+	/* ensure the MMU disable takes effect immediately */
+	isb();
 
 	/* Flush the caches */
 	dcsw_op_all(DCCISW);
diff --git a/plat/fvp/plat_gic.c b/plat/fvp/plat_gic.c
index 8457af1..db3c9cf 100644
--- a/plat/fvp/plat_gic.c
+++ b/plat/fvp/plat_gic.c
@@ -86,6 +86,7 @@
 	 */
 	scr_val = read_scr();
 	write_scr(scr_val | SCR_NS_BIT);
+	isb();	/* ensure NS=1 takes effect before accessing ICC_SRE_EL2 */
 
 	/*
 	 * By default EL2 and NS-EL1 software should be able to enable GICv3
@@ -103,9 +104,11 @@
 	write_icc_sre_el2(val | ICC_SRE_EN | ICC_SRE_SRE);
 
 	write_icc_pmr_el1(GIC_PRI_MASK);
+	isb();	/* commite ICC_* changes before setting NS=0 */
 
 	/* Restore SCR_EL3 */
 	write_scr(scr_val);
+	isb();	/* ensure NS=0 takes effect immediately */
 }
 
 /*******************************************************************************
diff --git a/plat/fvp/plat_pm.c b/plat/fvp/plat_pm.c
index 5430fff..f80e2d7 100644
--- a/plat/fvp/plat_pm.c
+++ b/plat/fvp/plat_pm.c
@@ -54,7 +54,11 @@
 	if (target_afflvl != MPIDR_AFFLVL0)
 		return PSCI_E_INVALID_PARAMS;
 
-	/* Enter standby state */
+	/*
+	 * Enter standby state
+	 * dsb is good practice before using wfi to enter low power states
+	 */
+	dsb();
 	wfi();
 
 	return PSCI_E_SUCCESS;
diff --git a/services/std_svc/psci/psci_afflvl_off.c b/services/std_svc/psci/psci_afflvl_off.c
index e007bc3..21a4d1a 100644
--- a/services/std_svc/psci/psci_afflvl_off.c
+++ b/services/std_svc/psci/psci_afflvl_off.c
@@ -82,6 +82,7 @@
 	sctlr = read_sctlr_el3();
 	sctlr &= ~SCTLR_C_BIT;
 	write_sctlr_el3(sctlr);
+	isb();	/* ensure MMU disable takes immediate effect */
 
 	/*
 	 * CAUTION: This flush to the level of unification makes an assumption
diff --git a/services/std_svc/psci/psci_afflvl_suspend.c b/services/std_svc/psci/psci_afflvl_suspend.c
index dc12f7a..534e4a9 100644
--- a/services/std_svc/psci/psci_afflvl_suspend.c
+++ b/services/std_svc/psci/psci_afflvl_suspend.c
@@ -198,6 +198,7 @@
 	sctlr = read_sctlr_el3();
 	sctlr &= ~SCTLR_C_BIT;
 	write_sctlr_el3(sctlr);
+	isb();	/* ensure MMU disable takes immediate effect */
 
 	/*
 	 * CAUTION: This flush to the level of unification makes an assumption
diff --git a/services/std_svc/psci/psci_entry.S b/services/std_svc/psci/psci_entry.S
index e2c690d..ec55a81 100644
--- a/services/std_svc/psci/psci_entry.S
+++ b/services/std_svc/psci/psci_entry.S
@@ -75,7 +75,6 @@
 	 * ---------------------------------------------
 	 */
 	msr	spsel, #0
-	isb
 
 	bl	read_mpidr
 	mov	x19, x0
@@ -158,7 +157,7 @@
 	ret
 
 func final_wfi
-	dsb	sy
+	dsb	sy		// ensure write buffer empty
 	wfi
 wfi_spill:
 	b	wfi_spill