fix(intel): add cache invalidation during BL31 initialization

During warm boot, the data cache is invalidated before
enabling them in u-boot proper, this cache
invalidation (+ cleaning) leads to the sync-up of stale
values in the cache to be synced with the main memory.
So, before the cache cleaning is done in u-boot proper,
it is invalidated in BL31 so that the cache data gets in
sync with u-boot proper memory address space and when
u-boot proper does its initialization which in turn clears
its BSS and heap section.

Change-Id: Ic8d8672f1e371868be7f54f5a1fae9229ab15164
Signed-off-by: Tanmay Kathpalia <tanmay.kathpalia@intel.com>
Signed-off-by: Jit Loon Lim <jit.loon.lim@intel.com>
diff --git a/plat/intel/soc/agilex5/bl31_plat_setup.c b/plat/intel/soc/agilex5/bl31_plat_setup.c
index 8d3928f..96c4161 100644
--- a/plat/intel/soc/agilex5/bl31_plat_setup.c
+++ b/plat/intel/soc/agilex5/bl31_plat_setup.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2019-2024, ARM Limited and Contributors. All rights reserved.
  * Copyright (c) 2019-2023, Intel Corporation. All rights reserved.
+ * Copyright (c) 2024, Altera Corporation. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -17,6 +18,7 @@
 #include <lib/xlat_tables/xlat_tables_v2.h>
 #include <plat/common/platform.h>
 
+#include "agilex5_cache.h"
 #include "agilex5_power_manager.h"
 #include "ccu/ncore_ccu.h"
 #include "socfpga_mailbox.h"
@@ -193,7 +195,8 @@
 	boot_core = (mmio_read_32(AGX5_PWRMGR(MPU_BOOTCONFIG)) & 0xC00);
 	NOTICE("BL31: Boot Core = %x\n", boot_core);
 	NOTICE("BL31: CPU ID = %x\n", cpuid);
-
+	INFO("BL31: Invalidate Data cache\n");
+	invalidate_dcache_all();
 }
 
 /* Get non-secure image entrypoint for BL33. Zephyr and Linux */
diff --git a/plat/intel/soc/agilex5/include/agilex5_cache.h b/plat/intel/soc/agilex5/include/agilex5_cache.h
new file mode 100644
index 0000000..095d99e
--- /dev/null
+++ b/plat/intel/soc/agilex5/include/agilex5_cache.h
@@ -0,0 +1,12 @@
+/*
+ * Copyright (c) 2024, Altera Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef AGX5_CACHE_H
+#define AGX5_CACHE_H
+
+void invalidate_dcache_all(void);
+
+#endif /* AGX5_CACHE_H */
diff --git a/plat/intel/soc/agilex5/platform.mk b/plat/intel/soc/agilex5/platform.mk
index 409c7b1..90678e1 100644
--- a/plat/intel/soc/agilex5/platform.mk
+++ b/plat/intel/soc/agilex5/platform.mk
@@ -87,6 +87,7 @@
 		lib/cpus/aarch64/cortex_a76.S				\
 		plat/common/plat_psci_common.c				\
 		plat/intel/soc/agilex5/bl31_plat_setup.c		\
+		plat/intel/soc/agilex5/soc/agilex5_cache.S		\
 		plat/intel/soc/agilex5/soc/agilex5_clock_manager.c	\
 		plat/intel/soc/agilex5/soc/agilex5_power_manager.c	\
 		plat/intel/soc/common/socfpga_psci.c			\
diff --git a/plat/intel/soc/agilex5/soc/agilex5_cache.S b/plat/intel/soc/agilex5/soc/agilex5_cache.S
new file mode 100644
index 0000000..a174386
--- /dev/null
+++ b/plat/intel/soc/agilex5/soc/agilex5_cache.S
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2024, Altera Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <arch.h>
+#include <asm_macros.S>
+#include <cpu_macros.S>
+#include <plat_macros.S>
+
+	.globl invalidate_dcache_all
+
+.pushsection .text.asm_dcache_level, "ax"
+func asm_dcache_level
+	lsl	x12, x0, #1
+	msr	csselr_el1, x12		/* select cache level */
+	isb				/* sync change of cssidr_el1 */
+	mrs	x6, ccsidr_el1		/* read the new cssidr_el1 */
+	ubfx	x2, x6,  #0,  #3	/* x2 <- log2(cache line size)-4 */
+	ubfx	x3, x6,  #3, #10	/* x3 <- number of cache ways - 1 */
+	ubfx	x4, x6, #13, #15	/* x4 <- number of cache sets - 1 */
+	add	x2, x2, #4		/* x2 <- log2(cache line size) */
+	clz	w5, w3			/* bit position of #ways */
+	/* x12 <- cache level << 1 */
+	/* x2 <- line length offset */
+	/* x3 <- number of cache ways - 1 */
+	/* x4 <- number of cache sets - 1 */
+	/* x5 <- bit position of #ways */
+
+loop_set:
+	mov	x6, x3			/* x6 <- working copy of #ways */
+loop_way:
+	lsl	x7, x6, x5
+	orr	x9, x12, x7		/* map way and level to cisw value */
+	lsl	x7, x4, x2
+	orr	x9, x9, x7		/* map set number to cisw value */
+	tbz	w1, #0, 1f
+	dc	isw, x9
+	b	2f
+1:	dc	cisw, x9		/* clean & invalidate by set/way */
+2:	subs	x6, x6, #1		/* decrement the way */
+	b.ge	loop_way
+	subs	x4, x4, #1		/* decrement the set */
+	b.ge	loop_set
+
+	ret
+endfunc asm_dcache_level
+.popsection
+
+/*
+ * void __asm_flush_dcache_all(int invalidate_only)
+ *
+ * x0: 0 clean & invalidate, 1 invalidate only
+ *
+ * flush or invalidate all data cache by SET/WAY.
+ */
+.pushsection .text.asm_dcache_all, "ax"
+func asm_dcache_all
+	mov	x1, x0
+	dsb	sy
+	mrs	x10, clidr_el1		/* read clidr_el1 */
+	ubfx	x11, x10, #24, #3	/* x11 <- loc */
+	cbz	x11, finished		/* if loc is 0, exit */
+	mov	x15, x30
+	mov	x0, #0			/* start flush at cache level 0 */
+	/* x0  <- cache level */
+	/* x10 <- clidr_el1 */
+	/* x11 <- loc */
+	/* x15 <- return address */
+
+loop_level:
+	add	x12, x0, x0, lsl #1	/* x12 <- tripled cache level */
+	lsr	x12, x10, x12
+	and	x12, x12, #7		/* x12 <- cache type */
+	cmp	x12, #2
+	b.lt	skip			/* skip if no cache or icache */
+	bl	asm_dcache_level	/* x1 = 0 flush, 1 invalidate */
+skip:
+	add	x0, x0, #1		/* increment cache level */
+	cmp	x11, x0
+	b.gt	loop_level
+
+	mov	x0, #0
+	msr	csselr_el1, x0		/* restore csselr_el1 */
+	dsb	sy
+	isb
+	mov	x30, x15
+
+finished:
+	ret
+endfunc asm_dcache_all
+.popsection
+
+.pushsection .text.invalidate_dcache_all, "ax"
+func invalidate_dcache_all
+	mov	x0, #0x1
+	b	asm_dcache_all
+endfunc invalidate_dcache_all
+.popsection