xlat v2: Flush xlat tables after being modified

During cold boot, the initial translation tables are created with data
caches disabled, so all modifications go to memory directly. After the
MMU is enabled and data cache is enabled, any modification to the tables
goes to data cache, and eventually may get flushed to memory.

If CPU0 modifies the tables while CPU1 is off, CPU0 will have the
modified tables in its data cache. When CPU1 is powered on, the MMU is
enabled, then it enables coherency, and then it enables the data cache.
Until this is done, CPU1 isn't in coherency, and the translation tables
it sees can be outdated if CPU0 still has some modified entries in its
data cache.

This can be a problem in some cases. For example, the warm boot code
uses only the tables mapped during cold boot, which don't normally
change. However, if they are modified (and a RO page is made RW, or a XN
page is made executable) the CPU will see the old attributes and crash
when it tries to access it.

This doesn't happen in systems with HW_ASSISTED_COHERENCY or
WARMBOOT_ENABLE_DCACHE_EARLY. In these systems, the data cache is
enabled at the same time as the MMU. As soon as this happens, the CPU is
in coherency.

There was an attempt of a fix in psci_helpers.S, but it didn't solve the
problem. That code has been deleted. The code was introduced in commit
<264410306381> ("Invalidate TLB entries during warm boot").

Now, during a map or unmap operation, the memory associated to each
modified table is flushed. Traversing a table will also flush it's
memory, as there is no way to tell in the current implementation if the
table that has been traversed has also been modified.

Change-Id: I4b520bca27502f1018878061bc5fb82af740bb92
Signed-off-by: Antonio Nino Diaz <antonio.ninodiaz@arm.com>
diff --git a/lib/psci/aarch32/psci_helpers.S b/lib/psci/aarch32/psci_helpers.S
index a29a29c..63d7e70 100644
--- a/lib/psci/aarch32/psci_helpers.S
+++ b/lib/psci/aarch32/psci_helpers.S
@@ -91,28 +91,6 @@
 	stcopr	r0, SCTLR
 	isb
 
-#if PLAT_XLAT_TABLES_DYNAMIC
-	/* ---------------------------------------------
-	 * During warm boot the MMU is enabled with data
-	 * cache disabled, then the interconnect is set
-	 * up and finally the data cache is enabled.
-	 *
-	 * During this period, if another CPU modifies
-	 * the translation tables, the MMU table walker
-	 * may read the old entries. This is only a
-	 * problem for dynamic regions, the warm boot
-	 * code isn't affected because it is static.
-	 *
-	 * Invalidate all TLB entries loaded while the
-	 * CPU wasn't coherent with the rest of the
-	 * system.
-	 * ---------------------------------------------
-	 */
-	stcopr	r0, TLBIALL
-	dsb	ish
-	isb
-#endif
-
 	pop	{r12, pc}
 endfunc psci_do_pwrup_cache_maintenance
 
diff --git a/lib/psci/aarch64/psci_helpers.S b/lib/psci/aarch64/psci_helpers.S
index d37ca76..06d6636 100644
--- a/lib/psci/aarch64/psci_helpers.S
+++ b/lib/psci/aarch64/psci_helpers.S
@@ -115,28 +115,6 @@
 	msr	sctlr_el3, x0
 	isb
 
-#if PLAT_XLAT_TABLES_DYNAMIC
-	/* ---------------------------------------------
-	 * During warm boot the MMU is enabled with data
-	 * cache disabled, then the interconnect is set
-	 * up and finally the data cache is enabled.
-	 *
-	 * During this period, if another CPU modifies
-	 * the translation tables, the MMU table walker
-	 * may read the old entries. This is only a
-	 * problem for dynamic regions, the warm boot
-	 * code isn't affected because it is static.
-	 *
-	 * Invalidate all TLB entries loaded while the
-	 * CPU wasn't coherent with the rest of the
-	 * system.
-	 * ---------------------------------------------
-	 */
-	tlbi	alle3
-	dsb	ish
-	isb
-#endif
-
 	ldp	x29, x30, [sp], #16
 	ret
 endfunc psci_do_pwrup_cache_maintenance
diff --git a/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c b/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c
index c09fb59..2482840 100644
--- a/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c
+++ b/lib/xlat_tables_v2/aarch32/xlat_tables_arch.c
@@ -48,6 +48,11 @@
 	return (read_sctlr() & SCTLR_M_BIT) != 0;
 }
 
+bool is_dcache_enabled(void)
+{
+	return (read_sctlr() & SCTLR_C_BIT) != 0;
+}
+
 uint64_t xlat_arch_regime_get_xn_desc(int xlat_regime __unused)
 {
 	return UPPER_ATTRS(XN);
diff --git a/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c b/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
index 4e4292f..cf8070a 100644
--- a/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
+++ b/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
@@ -112,6 +112,17 @@
 	}
 }
 
+bool is_dcache_enabled(void)
+{
+	unsigned int el = (unsigned int)GET_EL(read_CurrentEl());
+
+	if (el == 1U) {
+		return (read_sctlr_el1() & SCTLR_C_BIT) != 0U;
+	} else {
+		return (read_sctlr_el3() & SCTLR_C_BIT) != 0U;
+	}
+}
+
 uint64_t xlat_arch_regime_get_xn_desc(int xlat_regime)
 {
 	if (xlat_regime == EL1_EL0_REGIME) {
diff --git a/lib/xlat_tables_v2/xlat_tables_core.c b/lib/xlat_tables_v2/xlat_tables_core.c
index 54e5834..56b9514 100644
--- a/lib/xlat_tables_v2/xlat_tables_core.c
+++ b/lib/xlat_tables_v2/xlat_tables_core.c
@@ -18,6 +18,13 @@
 
 #include "xlat_tables_private.h"
 
+/* Helper function that cleans the data cache only if it is enabled. */
+static inline void xlat_clean_dcache_range(uintptr_t addr, size_t size)
+{
+	if (is_dcache_enabled())
+		clean_dcache_range(addr, size);
+}
+
 #if PLAT_XLAT_TABLES_DYNAMIC
 
 /*
@@ -329,7 +336,10 @@
 			xlat_tables_unmap_region(ctx, mm, table_idx_va,
 						 subtable, XLAT_TABLE_ENTRIES,
 						 level + 1U);
-
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+			xlat_clean_dcache_range((uintptr_t)subtable,
+				XLAT_TABLE_ENTRIES * sizeof(uint64_t));
+#endif
 			/*
 			 * If the subtable is now empty, remove its reference.
 			 */
@@ -563,6 +573,10 @@
 			end_va = xlat_tables_map_region(ctx, mm, table_idx_va,
 					       subtable, XLAT_TABLE_ENTRIES,
 					       level + 1U);
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+			xlat_clean_dcache_range((uintptr_t)subtable,
+				XLAT_TABLE_ENTRIES * sizeof(uint64_t));
+#endif
 			if (end_va !=
 				(table_idx_va + XLAT_BLOCK_SIZE(level) - 1U))
 				return end_va;
@@ -575,6 +589,10 @@
 			end_va = xlat_tables_map_region(ctx, mm, table_idx_va,
 					       subtable, XLAT_TABLE_ENTRIES,
 					       level + 1U);
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+			xlat_clean_dcache_range((uintptr_t)subtable,
+				XLAT_TABLE_ENTRIES * sizeof(uint64_t));
+#endif
 			if (end_va !=
 				(table_idx_va + XLAT_BLOCK_SIZE(level) - 1U))
 				return end_va;
@@ -859,7 +877,10 @@
 		end_va = xlat_tables_map_region(ctx, mm_cursor,
 				0U, ctx->base_table, ctx->base_table_entries,
 				ctx->base_level);
-
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+		xlat_clean_dcache_range((uintptr_t)ctx->base_table,
+				   ctx->base_table_entries * sizeof(uint64_t));
+#endif
 		/* Failed to map, remove mmap entry, unmap and return error. */
 		if (end_va != (mm_cursor->base_va + mm_cursor->size - 1U)) {
 			(void)memmove(mm_cursor, mm_cursor + 1U,
@@ -885,7 +906,10 @@
 			xlat_tables_unmap_region(ctx, &unmap_mm, 0U,
 				ctx->base_table, ctx->base_table_entries,
 				ctx->base_level);
-
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+			xlat_clean_dcache_range((uintptr_t)ctx->base_table,
+				ctx->base_table_entries * sizeof(uint64_t));
+#endif
 			return -ENOMEM;
 		}
 
@@ -951,6 +975,10 @@
 		xlat_tables_unmap_region(ctx, mm, 0U, ctx->base_table,
 					 ctx->base_table_entries,
 					 ctx->base_level);
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+		xlat_clean_dcache_range((uintptr_t)ctx->base_table,
+			ctx->base_table_entries * sizeof(uint64_t));
+#endif
 		xlat_arch_tlbi_va_sync();
 	}
 
@@ -1012,7 +1040,10 @@
 		uintptr_t end_va = xlat_tables_map_region(ctx, mm, 0U,
 				ctx->base_table, ctx->base_table_entries,
 				ctx->base_level);
-
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+		xlat_clean_dcache_range((uintptr_t)ctx->base_table,
+				   ctx->base_table_entries * sizeof(uint64_t));
+#endif
 		if (end_va != (mm->base_va + mm->size - 1U)) {
 			ERROR("Not enough memory to map region:\n"
 			      " VA:0x%lx  PA:0x%llx  size:0x%zx  attr:0x%x\n",
diff --git a/lib/xlat_tables_v2/xlat_tables_private.h b/lib/xlat_tables_v2/xlat_tables_private.h
index 313bd8d..528996a 100644
--- a/lib/xlat_tables_v2/xlat_tables_private.h
+++ b/lib/xlat_tables_v2/xlat_tables_private.h
@@ -97,4 +97,7 @@
  */
 bool is_mmu_enabled_ctx(const xlat_ctx_t *ctx);
 
+/* Returns true if the data cache is enabled at the current EL. */
+bool is_dcache_enabled(void);
+
 #endif /* XLAT_TABLES_PRIVATE_H */
diff --git a/lib/xlat_tables_v2/xlat_tables_utils.c b/lib/xlat_tables_v2/xlat_tables_utils.c
index 978a506..8cad348 100644
--- a/lib/xlat_tables_v2/xlat_tables_utils.c
+++ b/lib/xlat_tables_v2/xlat_tables_utils.c
@@ -539,7 +539,9 @@
 		 * before writing the new descriptor.
 		 */
 		*entry = INVALID_DESC;
-
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+		dccvac((uintptr_t)entry);
+#endif
 		/* Invalidate any cached copy of this mapping in the TLBs. */
 		xlat_arch_tlbi_va(base_va, ctx->xlat_regime);
 
@@ -548,7 +550,9 @@
 
 		/* Write new descriptor */
 		*entry = xlat_desc(ctx, new_attr, addr_pa, level);
-
+#if !(HW_ASSISTED_COHERENCY || WARMBOOT_ENABLE_DCACHE_EARLY)
+		dccvac((uintptr_t)entry);
+#endif
 		base_va += PAGE_SIZE;
 	}