FVP_Base_AEMv8A platform: Fix cache maintenance operations

This patch fixes FVP_Base_AEMv8A model hang issue with
ARMv8.4+ with cache modelling enabled configuration.
Incorrect L1 cache flush operation to PoU, using CLIDR_EL1
LoUIS field, which is required by the architecture to be
zero for ARMv8.4-A with ARMv8.4-S2FWB feature is replaced
with L1 to L2 and L2 to L3 (if L3 is present) cache flushes.
FVP_Base_AEMv8A model can be configured with L3 enabled by
setting `cluster0.l3cache-size` and `cluster1.l3cache-size`
to non-zero values, and presence of L3 is checked in
`aem_generic_core_pwr_dwn` function by reading
CLIDR_EL1.Ctype3 field value.

Change-Id: If3de3d4eb5ed409e5b4ccdbc2fe6d5a01894a9af
Signed-off-by: Alexei Fedorov <Alexei.Fedorov@arm.com>
diff --git a/lib/cpus/aarch64/aem_generic.S b/lib/cpus/aarch64/aem_generic.S
index 51b5ce9..6291e43 100644
--- a/lib/cpus/aarch64/aem_generic.S
+++ b/lib/cpus/aarch64/aem_generic.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -18,15 +18,43 @@
 	msr	sctlr_el3, x1
 	isb
 
+	/* ---------------------------------------------
+	 * AEM model supports L3 caches in which case L2
+	 * will be private per core caches and flush
+	 * from L1 to L2 is not sufficient.
+	 * ---------------------------------------------
+	 */
+	mrs	x1, clidr_el1
+
+	/* ---------------------------------------------
+	 * Check if L3 cache is implemented.
+	 * ---------------------------------------------
+	 */
+	tst	x1, ((1 << CLIDR_FIELD_WIDTH) - 1) << CTYPE_SHIFT(3)
+
+	/* ---------------------------------------------
+	 * There is no L3 cache, flush L1 to L2 only.
+	 * ---------------------------------------------
+	 */
 	mov	x0, #DCCISW
+	b.eq	dcsw_op_level1
+
+	mov	x18, x30
 
 	/* ---------------------------------------------
-	 * Flush L1 cache to PoU.
+	 * Flush L1 cache to L2.
 	 * ---------------------------------------------
 	 */
-	b	dcsw_op_louis
-endfunc aem_generic_core_pwr_dwn
+	bl	dcsw_op_level1
+	mov	x30, x18
 
+	/* ---------------------------------------------
+	 * Flush L2 cache to L3.
+	 * ---------------------------------------------
+	 */
+	mov	x0, #DCCISW
+	b	dcsw_op_level2
+endfunc aem_generic_core_pwr_dwn
 
 func aem_generic_cluster_pwr_dwn
 	/* ---------------------------------------------
@@ -39,7 +67,7 @@
 	isb
 
 	/* ---------------------------------------------
-	 * Flush L1 and L2 caches to PoC.
+	 * Flush all caches to PoC.
 	 * ---------------------------------------------
 	 */
 	mov	x0, #DCCISW