xlat v2: Split MMU setup and enable

At present, the function provided by the translation library to enable
MMU constructs appropriate values for translation library, and programs
them to the right registers. The construction of initial values,
however, is only required once as both the primary and secondaries
program the same values.

Additionally, the MMU-enabling function is written in C, which means
there's an active stack at the time of enabling MMU. On some systems,
like Arm DynamIQ, having active stack while enabling MMU during warm
boot might lead to coherency problems.

This patch addresses both the above problems by:

  - Splitting the MMU-enabling function into two: one that sets up
    values to be programmed into the registers, and another one that
    takes the pre-computed values and writes to the appropriate
    registers. With this, the primary effectively calls both functions
    to have the MMU enabled, but secondaries only need to call the
    latter.

  - Rewriting the function that enables MMU in assembly so that it
    doesn't use stack.

This patch fixes a bunch of MISRA issues on the way.

Change-Id: I0faca97263a970ffe765f0e731a1417e43fbfc45
Signed-off-by: Jeenu Viswambharan <jeenu.viswambharan@arm.com>
diff --git a/lib/xlat_tables_v2/aarch64/enable_mmu.S b/lib/xlat_tables_v2/aarch64/enable_mmu.S
new file mode 100644
index 0000000..a72c7fa
--- /dev/null
+++ b/lib/xlat_tables_v2/aarch64/enable_mmu.S
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <xlat_tables_v2.h>
+
+	.global	enable_mmu_direct_el1
+	.global	enable_mmu_direct_el3
+
+	/* Macros to read and write to system register for a given EL. */
+	.macro _msr reg_name, el, gp_reg
+	msr	\reg_name\()_el\()\el, \gp_reg
+	.endm
+
+	.macro _mrs gp_reg, reg_name, el
+	mrs	\gp_reg, \reg_name\()_el\()\el
+	.endm
+
+	.macro define_mmu_enable_func el
+	func enable_mmu_direct_\()el\el
+#if ENABLE_ASSERTIONS
+		_mrs	x1, sctlr, \el
+		tst	x1, #SCTLR_M_BIT
+		ASM_ASSERT(eq)
+#endif
+
+		/* Invalidate TLB entries */
+		.if \el == 1
+		TLB_INVALIDATE(vmalle1)
+		.else
+		.if \el == 3
+		TLB_INVALIDATE(alle3)
+		.else
+		.error "EL must be 1 or 3"
+		.endif
+		.endif
+
+		mov	x7, x0
+		ldr	x0, =mmu_cfg_params
+
+		/* MAIR */
+		ldr	w1, [x0, #(MMU_CFG_MAIR0 << 2)]
+		_msr	mair, \el, x1
+
+		/* TCR */
+		ldr	w2, [x0, #(MMU_CFG_TCR << 2)]
+		_msr	tcr, \el, x2
+
+		/* TTBR */
+		ldr	w3, [x0, #(MMU_CFG_TTBR0_LO << 2)]
+		ldr	w4, [x0, #(MMU_CFG_TTBR0_HI << 2)]
+		orr	x3, x3, x4, lsl #32
+		_msr	ttbr0, \el, x3
+
+		/*
+		 * Ensure all translation table writes have drained into memory, the TLB
+		 * invalidation is complete, and translation register writes are
+		 * committed before enabling the MMU
+		 */
+		dsb	ish
+		isb
+
+		/* Set and clear required fields of SCTLR */
+		_mrs	x4, sctlr, \el
+		mov_imm	x5, SCTLR_WXN_BIT | SCTLR_C_BIT | SCTLR_M_BIT
+		orr	x4, x4, x5
+
+		/* Additionally, amend SCTLR fields based on flags */
+		bic	x5, x4, #SCTLR_C_BIT
+		tst	x7, #DISABLE_DCACHE
+		csel	x4, x5, x4, ne
+
+		_msr	sctlr, \el, x4
+		isb
+
+		ret
+	endfunc enable_mmu_direct_\()el\el
+	.endm
+
+	/*
+	 * Define MMU-enabling functions for EL1 and EL3:
+	 *
+	 *  enable_mmu_direct_el1
+	 *  enable_mmu_direct_el3
+	 */
+	define_mmu_enable_func 1
+	define_mmu_enable_func 3
diff --git a/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c b/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
index c501e70..71b9c8f 100644
--- a/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
+++ b/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
@@ -16,6 +16,8 @@
 #include <xlat_tables_v2.h>
 #include "../xlat_tables_private.h"
 
+uint32_t mmu_cfg_params[MMU_CFG_PARAM_MAX];
+
 /*
  * Returns 1 if the provided granule size is supported, 0 otherwise.
  */
@@ -183,70 +185,13 @@
 	return el;
 }
 
-/*******************************************************************************
- * Macro generating the code for the function enabling the MMU in the given
- * exception level, assuming that the pagetables have already been created.
- *
- *   _el:		Exception level at which the function will run
- *   _tlbi_fct:		Function to invalidate the TLBs at the current
- *			exception level
- ******************************************************************************/
-#define DEFINE_ENABLE_MMU_EL(_el, _tlbi_fct)				\
-	static void enable_mmu_internal_el##_el(int flags,		\
-						uint64_t mair,		\
-						uint64_t tcr,		\
-						uint64_t ttbr)		\
-	{								\
-		uint32_t sctlr = read_sctlr_el##_el();			\
-		assert((sctlr & SCTLR_M_BIT) == 0);			\
-									\
-		/* Invalidate TLBs at the current exception level */	\
-		_tlbi_fct();						\
-									\
-		write_mair_el##_el(mair);				\
-		write_tcr_el##_el(tcr);					\
-									\
-		/* Set TTBR bits as well */				\
-		if (ARM_ARCH_AT_LEAST(8, 2)) {				\
-			/* Enable CnP bit so as to share page tables */	\
-			/* with all PEs. This is mandatory for */	\
-			/* ARMv8.2 implementations. */			\
-			ttbr |= TTBR_CNP_BIT;				\
-		}							\
-		write_ttbr0_el##_el(ttbr);				\
-									\
-		/* Ensure all translation table writes have drained */	\
-		/* into memory, the TLB invalidation is complete, */	\
-		/* and translation register writes are committed */	\
-		/* before enabling the MMU */				\
-		dsbish();						\
-		isb();							\
-									\
-		sctlr |= SCTLR_WXN_BIT | SCTLR_M_BIT;			\
-		if (flags & DISABLE_DCACHE)				\
-			sctlr &= ~SCTLR_C_BIT;				\
-		else							\
-			sctlr |= SCTLR_C_BIT;				\
-									\
-		write_sctlr_el##_el(sctlr);				\
-									\
-		/* Ensure the MMU enable takes effect immediately */	\
-		isb();							\
-	}
-
-/* Define EL1 and EL3 variants of the function enabling the MMU */
-#if IMAGE_EL == 1
-DEFINE_ENABLE_MMU_EL(1, tlbivmalle1)
-#elif IMAGE_EL == 3
-DEFINE_ENABLE_MMU_EL(3, tlbialle3)
-#endif
-
-void enable_mmu_arch(unsigned int flags,
-		uint64_t *base_table,
+void setup_mmu_cfg(unsigned int flags,
+		const uint64_t *base_table,
 		unsigned long long max_pa,
 		uintptr_t max_va)
 {
 	uint64_t mair, ttbr, tcr;
+	uintptr_t virtual_addr_space_size;
 
 	/* Set attributes in the right indices of the MAIR. */
 	mair = MAIR_ATTR_SET(ATTR_DEVICE, ATTR_DEVICE_INDEX);
@@ -256,27 +201,25 @@
 	ttbr = (uint64_t) base_table;
 
 	/*
-	 * Set TCR bits as well.
-	 */
-
-	/*
 	 * Limit the input address ranges and memory region sizes translated
 	 * using TTBR0 to the given virtual address space size.
 	 */
-	assert(max_va < UINTPTR_MAX);
-	uintptr_t virtual_addr_space_size = max_va + 1;
+	assert(max_va < ((uint64_t) UINTPTR_MAX));
+
+	virtual_addr_space_size = max_va + 1;
 	assert(CHECK_VIRT_ADDR_SPACE_SIZE(virtual_addr_space_size));
+
 	/*
 	 * __builtin_ctzll(0) is undefined but here we are guaranteed that
 	 * virtual_addr_space_size is in the range [1,UINTPTR_MAX].
 	 */
-	tcr = 64 - __builtin_ctzll(virtual_addr_space_size);
+	tcr = (uint64_t) 64 - __builtin_ctzll(virtual_addr_space_size);
 
 	/*
 	 * Set the cacheability and shareability attributes for memory
 	 * associated with translation table walks.
 	 */
-	if (flags & XLAT_TABLE_NC) {
+	if ((flags & XLAT_TABLE_NC) != 0) {
 		/* Inner & outer non-cacheable non-shareable. */
 		tcr |= TCR_SH_NON_SHAREABLE |
 			TCR_RGN_OUTER_NC | TCR_RGN_INNER_NC;
@@ -299,10 +242,23 @@
 	 * translated using TTBR1_EL1.
 	 */
 	tcr |= TCR_EPD1_BIT | (tcr_ps_bits << TCR_EL1_IPS_SHIFT);
-	enable_mmu_internal_el1(flags, mair, tcr, ttbr);
 #elif IMAGE_EL == 3
 	assert(IS_IN_EL(3));
 	tcr |= TCR_EL3_RES1 | (tcr_ps_bits << TCR_EL3_PS_SHIFT);
-	enable_mmu_internal_el3(flags, mair, tcr, ttbr);
 #endif
+
+	mmu_cfg_params[MMU_CFG_MAIR0] = (uint32_t) mair;
+	mmu_cfg_params[MMU_CFG_TCR] = (uint32_t) tcr;
+
+	/* Set TTBR bits as well */
+	if (ARM_ARCH_AT_LEAST(8, 2)) {
+		/*
+		 * Enable CnP bit so as to share page tables with all PEs. This
+		 * is mandatory for ARMv8.2 implementations.
+		 */
+		ttbr |= TTBR_CNP_BIT;
+	}
+
+	mmu_cfg_params[MMU_CFG_TTBR0_LO] = (uint32_t) ttbr;
+	mmu_cfg_params[MMU_CFG_TTBR0_HI] = (uint32_t) (ttbr >> 32);
 }